pax_global_header00006660000000000000000000000064133266612700014520gustar00rootroot0000000000000052 comment=e787742f75b50623b20728fcef20d9d696256aaa vowpal-wabbit-8.6.1.dfsg1/000077500000000000000000000000001332666127000153155ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/.editorconfig000066400000000000000000000020011332666127000177630ustar00rootroot00000000000000# See http://EditorConfig.org # Please, either use an editor supporting editorconfig # or set up your editor style according to this file manually # or use `astyle --lineend=linux --indent=spaces=2 *.cc *.h` # (see http://astyle.sourceforge.net/). # This is the top-most EditorConfig file root = true # Unix-style newlines with a newline ending every file [*] charset = utf-8 end_of_line = lf insert_final_newline = true trim_trailing_whitespace = true # C++ files [*.{cc,h}] indent_style = space indent_size = 2 indent_brace_style = Allman # Makefiles need tab indentation [Makefile] indent_style = tab [*.md] trim_trailing_whitespace = false # If not set this breaks class feature blocks [*.tt] insert_final_newline = false [*.cs] end_of_line = crlf indent_style = space indent_size = 4 insert_final_newline = false # The following line results in warning/error: # # { /// # # Does reverting to Allman [vw_clr/*.{cpp,h}] end_of_line = crlf indent_style = space indent_size = 4 indent_brace_style = Allman vowpal-wabbit-8.6.1.dfsg1/.gitignore000066400000000000000000000041221332666127000173040ustar00rootroot00000000000000Makefile.in cluster/Makefile.in cluster/Makefile acinclude.d/libtool.m4 acinclude.d/ltoptions.m4 acinclude.d/ltsugar.m4 acinclude.d/ltversion.m4 acinclude.d/lt~obsolete.m4 aclocal.m4 config.guess config.sub configure depcomp install-sh libtool ltmain.sh missing vowpalwabbit/Makefile.in vowpalwabbit/config.h.in config.h *.la *.lo vowpalwabbit/stamp-* config.status test/*.predict test/*.cmp *.log *.o *.a t_* bin vw .* vw.1 *.cache *.tgz test/*.tmp test/models test/topk.model .libs .deps autom4te.cache/ vowpalwabbit/active_interactor vowpalwabbit/vw cluster/spanning_tree *.d *.prev library/ezexample_predict library/ezexample_train library/library_example library/recommend library/gd_mf_weights library/test_search library/search_generate test/RunTests.last.times *.dSYM *~ *.pc *.pyc .DS_Store # build folders **/x64 **/obj **/Debug **/Release **/dll **/ipch **/target # VS files *.opensdf *.suo *.sdf *.vcxproj.user *.csproj.user *.so demo/mnist/infimnist demo/ocr/letter.* demo/ocr/run.* **/TestResults # IntelliJ *.idea *.iml # Ignore NuGet Packages *.nupkg # Ignore the packages folder **/packages # except build/, which is used as an MSBuild target. !**/packages/build/ # Uncomment if necessary however generally it will be regenerated when needed !**/packages/repositories.config # QT VW.config VW.creator VW.creator.user VW.files VW.includes # Python packaging python/.cache/** python/.eggs/** python/.tox/** python/build/** python/dist/** python/src/** python/vowpalwabbit.egg-info/** python/.coverage /COPYING /README /compile /cs/Serializer/Visitors/VowpalWabbitNativeVisitorExt.cs /cs/Serializer/VowpalWabbitDefaultMarshallerExt.cs /cs/unittest/RunTests.cs vowpalwabbit/vw.VC.VC.opendb vowpalwabbit/vw.VC.db /cs/cs/Serializer/VowpalWabbitDefaultMarshallerExt.cs /python/pylibvw.pyd /python/pylibvw.cp35-win_amd64.pyd decision_service/ds.VC.VC.opendb decision_service/ds.VC.db decision_service/unit_test/dsclient-test.out decision_service/rl.VC.VC.opendb decision_service/rl.VC.db index.html packages-microsoft-prod.deb decision_service/unit_test/rlclient-test.out cs/azure_service/azure_service.ccproj.user vowpal-wabbit-8.6.1.dfsg1/.gitkeep000066400000000000000000000000001332666127000167340ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/.travis.yml000066400000000000000000000041351332666127000174310ustar00rootroot00000000000000language: cpp compiler: - gcc addons: apt: sources: - george-edison55-precise-backports # cmake 3.2.3 / doxygen 1.8.3 packages: - cmake - cmake-data before_install: # Remove cmake ppa - sudo add-apt-repository -y --remove "ppa:george-edison55/precise-backports" #### begin g++ 4.9 # ppa for g++ 4.9 (first version that supports complete c++11. i.e. ) - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test - sudo apt-get update - sudo apt-get install gcc-4.9 g++-4.9 - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.9 60 --slave /usr/bin/g++ g++ /usr/bin/g++-4.9 - sudo add-apt-repository -y --remove "ubuntu-toolchain-r-test" #### end g++ 4.9 - sudo apt-get update -qq - sudo apt-get install -qq libboost-all-dev - sudo apt-get install maven #### begin cpprest - sudo apt-get install g++ git make zlib1g-dev libboost-all-dev libssl-dev - git clone https://github.com/Microsoft/cpprestsdk.git casablanca - cd casablanca/Release - mkdir build - cd build - cmake .. - make - sudo make install - cd ../../../ - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib #### end cpprest - sudo pip install cpp-coveralls wheel # use miniconda for python package testing - wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh; - bash miniconda.sh -b -p $HOME/miniconda - export PATH="$HOME/miniconda/bin:$PATH" - hash -r - conda config --set always_yes yes --set changeps1 no - conda update -q conda - conda create -q -n test-python27 python=2.7 nomkl numpy scipy scikit-learn script: - make all - make python - mvn clean test -f java/pom.xml - make test - make rl_clientlib_test - cd test - ./test_race_condition.sh - cd .. - make test_gcov --always-make - cd python - source activate test-python27 - pip install pytest readme_renderer pandas - python setup.py check -mrs - python setup.py install - py.test tests - source deactivate - cd .. after_success: - coveralls --exclude lib --exclude tests --gcov-options '\-lp' vowpal-wabbit-8.6.1.dfsg1/AUTHORS000066400000000000000000000025131332666127000163660ustar00rootroot00000000000000John Langford ... and many others. The git log includes at least: Alekh Agarwal Ales Tamchyna Alex Grubb Alexander K. Hudek Alexander Trufanov Alexey Rodriguez Yakushev Anna Choromanska Ariel Faigon B. Scott Michel Ben Gimpert Ben Wing Bharath Krishnan Brian Muller Chris Quirk Dan Melamed Daniel Erenrich Daniel Hsu Doug Shore Eric Whyne Francesco Orabona Gabriel Synnaeve Gordon Rios Hal Daume III Harry Robertson HaveF Hui Gao Ivan Dyedov Jacob Hofman Jaimyoung Kwon Jake Hofman Jan Szumiec Jarrod R Parker John Langford Jon Morra Kai-Wei Chang Kristian Holsheimer Lihong Li Luis Pedro Coelho Lukas Zilka Luong Hoang Marko Asplund Marcel Laverdet Martin Popel Martin Thomas Matthew Hoffman Michael Brundage Michelangelo D'Agostino Monami Sharma Nick Nussbaum Nikos Karampatziakis Nirupama Chandrasekaran Olek Zendel Olivier Chapelle Parag Agrawal Paul Mineiro Paulius Klyvis Priska Herger Qiao Mu Rob Zinkov Roy Frostig Rukshan Batuwitage Sam Lendle Sam Steingold Selim Raboudi Sergei Matusevich Scott Graham Sharat Chikkerur Shravan M Narayanamurthy Siddhartha Sen Stephane Ross Stephen Merity Tim Cowlishaw Tom De Coninck Vaclav Petricek Vladimir Vladimirov Werner Fenchel (not really) Xavier Daull Yaroslav Halchenko Zach Mayer Zeno Gantner Zhen Qin vowpal-wabbit-8.6.1.dfsg1/INSTALL000066400000000000000000000224511332666127000163520ustar00rootroot00000000000000When working off the master branch try just typing 'make'. For releases do the below. Installation Instructions ************************* Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, 2006 Free Software Foundation, Inc. This file is free documentation; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. Basic Installation ================== Briefly, the shell commands `./configure; make; make install' should configure, build, and install this package. The following more-detailed instructions are generic; see the `README.md' file for instructions specific to this package. The `configure' shell script attempts to guess correct values for various system-dependent variables used during compilation. It uses those values to create a `Makefile' in each directory of the package. It may also create one or more `.h' files containing system-dependent definitions. Finally, it creates a shell script `config.status' that you can run in the future to recreate the current configuration, and a file `config.log' containing compiler output (useful mainly for debugging `configure'). It can also use an optional file (typically called `config.cache' and enabled with `--cache-file=config.cache' or simply `-C') that saves the results of its tests to speed up reconfiguring. Caching is disabled by default to prevent problems with accidental use of stale cache files. If you need to do unusual things to compile the package, please try to figure out how `configure' could check whether to do them, and mail diffs or instructions to the address given in the `README.md' so they can be considered for the next release. If you are using the cache, and at some point `config.cache' contains results you don't want to keep, you may remove or edit it. The file `configure.ac' (or `configure.in') is used to create `configure' by a program called `autoconf'. You need `configure.ac' if you want to change it or regenerate `configure' using a newer version of `autoconf'. The simplest way to compile this package is: 1. `cd' to the directory containing the package's source code and type `./configure' to configure the package for your system. Running `configure' might take a while. While running, it prints some messages telling which features it is checking for. 2. Type `make' to compile the package. 3. Optionally, type `make check' to run any self-tests that come with the package. 4. Type `make install' to install the programs and any data files and documentation. 5. You can remove the program binaries and object files from the source code directory by typing `make clean'. To also remove the files that `configure' created (so you can compile the package for a different kind of computer), type `make distclean'. There is also a `make maintainer-clean' target, but that is intended mainly for the package's developers. If you use it, you may have to get all sorts of other programs in order to regenerate files that came with the distribution. Compilers and Options ===================== Some systems require unusual options for compilation or linking that the `configure' script does not know about. Run `./configure --help' for details on some of the pertinent environment variables. You can give `configure' initial values for configuration parameters by setting variables in the command line or in the environment. Here is an example: ./configure CC=c99 CFLAGS=-g LIBS=-lposix *Note Defining Variables::, for more details. Compiling For Multiple Architectures ==================================== You can compile the package for more than one kind of computer at the same time, by placing the object files for each architecture in their own directory. To do this, you can use GNU `make'. `cd' to the directory where you want the object files and executables to go and run the `configure' script. `configure' automatically checks for the source code in the directory that `configure' is in and in `..'. With a non-GNU `make', it is safer to compile the package for one architecture at a time in the source code directory. After you have installed the package for one architecture, use `make distclean' before reconfiguring for another architecture. Installation Names ================== By default, `make install' installs the package's commands under `/usr/local/bin', include files under `/usr/local/include', etc. You can specify an installation prefix other than `/usr/local' by giving `configure' the option `--prefix=PREFIX'. You can specify separate installation prefixes for architecture-specific files and architecture-independent files. If you pass the option `--exec-prefix=PREFIX' to `configure', the package uses PREFIX as the prefix for installing programs and libraries. Documentation and other data files still use the regular prefix. In addition, if you use an unusual directory layout you can give options like `--bindir=DIR' to specify different values for particular kinds of files. Run `configure --help' for a list of the directories you can set and what kinds of files go in them. If the package supports it, you can cause programs to be installed with an extra prefix or suffix on their names by giving `configure' the option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. Optional Features ================= Some packages pay attention to `--enable-FEATURE' options to `configure', where FEATURE indicates an optional part of the package. They may also pay attention to `--with-PACKAGE' options, where PACKAGE is something like `gnu-as' or `x' (for the X Window System). The `README.md' should mention any `--enable-' and `--with-' options that the package recognizes. For packages that use the X Window System, `configure' can usually find the X include and library files automatically, but if it doesn't, you can use the `configure' options `--x-includes=DIR' and `--x-libraries=DIR' to specify their locations. Specifying the System Type ========================== There may be some features `configure' cannot figure out automatically, but needs to determine by the type of machine the package will run on. Usually, assuming the package is built to be run on the _same_ architectures, `configure' can figure that out, but if it prints a message saying it cannot guess the machine type, give it the `--build=TYPE' option. TYPE can either be a short name for the system type, such as `sun4', or a canonical name which has the form: CPU-COMPANY-SYSTEM where SYSTEM can have one of these forms: OS KERNEL-OS See the file `config.sub' for the possible values of each field. If `config.sub' isn't included in this package, then this package doesn't need to know the machine type. If you are _building_ compiler tools for cross-compiling, you should use the option `--target=TYPE' to select the type of system they will produce code for. If you want to _use_ a cross compiler, that generates code for a platform different from the build platform, you should specify the "host" platform (i.e., that on which the generated programs will eventually be run) with `--host=TYPE'. Sharing Defaults ================ If you want to set default values for `configure' scripts to share, you can create a site shell script called `config.site' that gives default values for variables like `CC', `cache_file', and `prefix'. `configure' looks for `PREFIX/share/config.site' if it exists, then `PREFIX/etc/config.site' if it exists. Or, you can set the `CONFIG_SITE' environment variable to the location of the site script. A warning: not all `configure' scripts look for a site script. Defining Variables ================== Variables not defined in a site shell script can be set in the environment passed to `configure'. However, some packages may run configure again during the build, and the customized values of these variables may be lost. In order to avoid this problem, you should set them in the `configure' command line, using `VAR=value'. For example: ./configure CC=/usr/local2/bin/gcc causes the specified `gcc' to be used as the C compiler (unless it is overridden in the site shell script). Unfortunately, this technique does not work for `CONFIG_SHELL' due to an Autoconf bug. Until the bug is fixed you can use this workaround: CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash `configure' Invocation ====================== `configure' recognizes the following options to control how it operates. `--help' `-h' Print a summary of the options to `configure', and exit. `--version' `-V' Print the version of Autoconf used to generate the `configure' script, and exit. `--cache-file=FILE' Enable the cache: use and save the results of the tests in FILE, traditionally `config.cache'. FILE defaults to `/dev/null' to disable caching. `--config-cache' `-C' Alias for `--cache-file=config.cache'. `--quiet' `--silent' `-q' Do not print messages saying which checks are being made. To suppress all normal output, redirect it to `/dev/null' (any error messages will still be shown). `--srcdir=DIR' Look for the package's source code in directory DIR. Usually `configure' can determine that directory automatically. `configure' also accepts some other, not widely useful, options. Run `configure --help' for more details. vowpal-wabbit-8.6.1.dfsg1/LICENSE000066400000000000000000000030601332666127000163210ustar00rootroot00000000000000Copyright © Microsoft Corp 2012-2014, Yahoo! Inc. 2007-2012, and many individual contributors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the Microsoft Corp nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vowpal-wabbit-8.6.1.dfsg1/Makefile000066400000000000000000000141271332666127000167620ustar00rootroot00000000000000CXX ?= $(shell which g++) # -- if you want to test 32-bit use this instead, # it sometimes reveals type portability issues # CXX = $(shell which g++) -m32 ifneq ($(CXX),) #$(warning Using clang: "$(CXX)") ARCH = -D__extern_always_inline=inline else CXX = clang++ $(warning Using clang++) endif #ARCH = $(shell test `$CXX -v 2>&1 | tail -1 | cut -d ' ' -f 3 | cut -d '.' -f 1,2` \< 4.3 && echo -march=nocona || echo -march=native) ifeq ($(CXX),) $(warning No compiler found) exit 1 endif UNAME := $(shell uname) ARCH_UNAME := $(shell uname -m) LIBS = -l boost_program_options -l pthread -l z BOOST_INCLUDE = -I /usr/local/include/boost -I /usr/include BOOST_LIBRARY = -L /usr/local/lib -L /usr/lib NPROCS := 1 ifeq ($(UNAME), Linux) BOOST_LIBRARY += -L /usr/lib/x86_64-linux-gnu NPROCS:=$(shell grep -c ^processor /proc/cpuinfo) endif ifeq ($(UNAME), FreeBSD) LIBS = -l boost_program_options -l pthread -l z -l compat BOOST_INCLUDE = -I /usr/local/include NPROCS:=$(shell grep -c ^processor /proc/cpuinfo) endif ifeq "CYGWIN" "$(findstring CYGWIN,$(UNAME))" LIBS = -l boost_program_options-mt -l pthread -l z BOOST_INCLUDE = -I /usr/include NPROCS:=$(shell grep -c ^processor /proc/cpuinfo) endif ifeq ($(UNAME), Darwin) LIBS = -lboost_program_options-mt -lboost_serialization-mt -l pthread -l z # On Macs, the location isn't always clear # brew uses /usr/local # but /opt/local seems to be preferred by some users # so we try them both ifneq (,$(wildcard /usr/local/include)) BOOST_INCLUDE = -I /usr/local/include BOOST_LIBRARY = -L /usr/local/lib endif ifneq (,$(wildcard /opt/local/include)) BOOST_INCLUDE = -I /opt/local/include BOOST_LIBRARY = -L /opt/local/lib endif NPROCS:=$(shell sysctl -n hw.ncpu) endif ifneq ($(USER_BOOST_INCLUDE),) BOOST_INCLUDE = $(USER_BOOST_INCLUDE) endif ifneq ($(USER_BOOST_LIBRARY),) BOOST_LIBRARY = $(USER_BOOST_LIBRARY) endif JSON_INCLUDE = -I ../rapidjson/include #LIBS = -l boost_program_options-gcc34 -l pthread -l z ifeq ($(ARCH_UNAME), ppc64le) OPTIM_FLAGS ?= -DNDEBUG -O3 -fomit-frame-pointer -fno-strict-aliasing #-msse2 is not supported on power else OPTIM_FLAGS ?= -DNDEBUG -O3 -fomit-frame-pointer -fno-strict-aliasing -msse2 -mfpmath=sse #-ffast-math #uncomment for speed, comment for testability endif ifeq ($(UNAME), FreeBSD) WARN_FLAGS = -Wall else WARN_FLAGS = -Wall -pedantic endif # for normal fast execution. FLAGS = -std=c++11 $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) $(OPTIM_FLAGS) -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE) -fPIC #-DVW_LDA_NO_SSE # for profiling -- note that it needs to be gcc #FLAGS = -std=c++11 $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -O2 -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE) -pg -fPIC #CXX = g++ # for valgrind / gdb debugging #FLAGS = -std=c++11 $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE) -g -O0 -fPIC # for valgrind profiling: run 'valgrind --tool=callgrind PROGRAM' then 'callgrind_annotate --tree=both --inclusive=yes' #FLAGS = -std=c++11 $(CFLAGS) $(LDFLAGS) -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE) -g -fomit-frame-pointer -ffast-math -fno-strict-aliasing -fPIC FLAGS += -I ../rapidjson/include -I ../explore BINARIES = vw active_interactor MANPAGES = vw.1 default: vw all: vw library_example java spanning_tree rl_clientlib %.1: % help2man --no-info --name="Vowpal Wabbit -- fast online learning tool" ./$< > $@ export rl_clientlib: cd reinforcement_learning/rlclientlib; $(MAKE) -j $(NPROCS) things rl_clientlib_test: vw rl_clientlib cd reinforcement_learning/unit_test; $(MAKE) -j $(NPROCS) things (cd reinforcement_learning/unit_test && ./rlclient-test.out) rl_example: vw rl_clientlib cd reinforcement_learning/examples/basic_usage_cpp; $(MAKE) -j $(NPROCS) things cd reinforcement_learning/examples/rl_sim_cpp; $(MAKE) -j $(NPROCS) things spanning_tree: cd cluster; $(MAKE) vw: cd vowpalwabbit; $(MAKE) -j $(NPROCS) things #Target-specific flags for a profiling build. (Copied from line 70) vw_gcov: FLAGS = -std=c++11 $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -g -O0 -fprofile-arcs -ftest-coverage -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE) -I ../explore -pg -fPIC #-DVW_LDA_NO_S vw_gcov: CXX = g++ vw_gcov: cd vowpalwabbit && env LDFLAGS="-fprofile-arcs -ftest-coverage -lgcov"; $(MAKE) -j $(NPROCS) things active_interactor: cd vowpalwabbit; $(MAKE) library_example: vw cd library; $(MAKE) -j $(NPROCS) things #Target-specific flags for a profiling build. (Copied from line 70) library_example_gcov: FLAGS = -std=c++11 $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -g -O0 -fprofile-arcs -ftest-coverage -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE) -I ../explore -pg -fPIC #-DVW_LDA_NO_S library_example_gcov: CXX = g++ library_example_gcov: vw_gcov cd library && env LDFLAGS="-fprofile-arcs -ftest-coverage -lgcov"; $(MAKE) things python: vw cd python; $(MAKE) things java: vw cd java; $(MAKE) things .FORCE: test: .FORCE vw library_example @echo "vw running test-suite..." (cd test && ./RunTests -d -fe -E 0.001 -O --onethread ../vowpalwabbit/vw) (cd test && ./RunTests -d -fe -E 0.001 ../vowpalwabbit/vw) cd test && python save_resume_test.py --verbose_on_fail test_gcov: .FORCE vw_gcov library_example_gcov @echo "vw running test-suite..." (cd test && ./RunTests -d -fe -E 0.001 ../vowpalwabbit/vw ../vowpalwabbit/vw) bigtests: .FORCE vw (cd big_tests && $(MAKE) $(MAKEFLAGS)) install: $(BINARIES) cd vowpalwabbit; cp $(BINARIES) /usr/local/bin; cd ../cluster; $(MAKE) install; cd ../java; $(MAKE) install; doc: (cd doc && doxygen Doxyfile) clean: cd vowpalwabbit && $(MAKE) clean cd cluster && $(MAKE) clean cd library && $(MAKE) clean cd python && $(MAKE) clean cd java && $(MAKE) clean cd reinforcement_learning/rlclientlib && $(MAKE) clean cd reinforcement_learning/unit_test; $(MAKE) clean cd reinforcement_learning/examples/basic_usage_cpp; $(MAKE) clean cd reinforcement_learning/examples/rl_sim_cpp; $(MAKE) clean .PHONY: all clean install doc vowpal-wabbit-8.6.1.dfsg1/Makefile.am000066400000000000000000000050041332666127000173500ustar00rootroot00000000000000SUBDIRS = vowpalwabbit cluster library nobase_include_HEADERS = vowpalwabbit/allreduce.h \ vowpalwabbit/comp_io.h \ vowpalwabbit/config.h \ vowpalwabbit/example.h \ vowpalwabbit/action_score.h \ vowpalwabbit/feature_group.h \ vowpalwabbit/cb_explore.h \ vowpalwabbit/crossplat_compat.h \ vowpalwabbit/parse_example.h \ vowpalwabbit/floatbits.h \ vowpalwabbit/global_data.h \ vowpalwabbit/io_buf.h \ vowpalwabbit/learner.h \ vowpalwabbit/loss_functions.h \ vowpalwabbit/parse_primitives.h \ vowpalwabbit/parser.h \ vowpalwabbit/simple_label.h \ vowpalwabbit/v_array.h \ vowpalwabbit/vw.h \ vowpalwabbit/vwdll.h \ vowpalwabbit/label_parser.h \ vowpalwabbit/multiclass.h \ vowpalwabbit/cost_sensitive.h \ vowpalwabbit/cb.h \ vowpalwabbit/v_hashmap.h \ vowpalwabbit/memory.h \ vowpalwabbit/vw_exception.h \ vowpalwabbit/vw_validate.h \ vowpalwabbit/multilabel.h \ vowpalwabbit/constant.h \ vowpalwabbit/ezexample.h noinst_HEADERS = vowpalwabbit/accumulate.h \ vowpalwabbit/autolink.h \ vowpalwabbit/baseline.h \ vowpalwabbit/bfgs.h \ vowpalwabbit/binary.h \ vowpalwabbit/bs.h \ vowpalwabbit/cache.h \ vowpalwabbit/cb.h \ vowpalwabbit/cb_algs.h \ vowpalwabbit/cb_explore.h \ vowpalwabbit/cbify.h \ vowpalwabbit/comp_io.h \ vowpalwabbit/constant.h \ vowpalwabbit/cost_sensitive.h \ vowpalwabbit/csoaa.h \ vowpalwabbit/ect.h \ vowpalwabbit/interactions.h \ vowpalwabbit/gen_cs_example.h \ vowpalwabbit/gd.h \ vowpalwabbit/gd_mf.h \ vowpalwabbit/interact.h \ vowpalwabbit/kernel_svm.h \ vowpalwabbit/lda_core.h \ vowpalwabbit/log_multi.h \ vowpalwabbit/lrq.h \ vowpalwabbit/mf.h \ vowpalwabbit/multiclass.h \ vowpalwabbit/network.h \ vowpalwabbit/nn.h \ vowpalwabbit/noop.h \ vowpalwabbit/oaa.h \ vowpalwabbit/boosting.h \ vowpalwabbit/parse_args.h \ vowpalwabbit/parse_example.h \ vowpalwabbit/parse_regressor.h \ vowpalwabbit/print.h \ vowpalwabbit/rand48.h \ vowpalwabbit/recall_tree.h \ vowpalwabbit/reductions.h \ vowpalwabbit/scorer.h \ vowpalwabbit/search.h \ vowpalwabbit/search_sequencetask.h \ vowpalwabbit/search_entityrelationtask.h \ vowpalwabbit/search_hooktask.h \ vowpalwabbit/search_multiclasstask.h \ vowpalwabbit/search_dep_parser.h \ vowpalwabbit/sender.h \ vowpalwabbit/stagewise_poly.h \ vowpalwabbit/topk.h \ vowpalwabbit/unique_sort.h \ vowpalwabbit/v_hashmap.h ACLOCAL_AMFLAGS = -I acinclude.d pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = libvw.pc libvw_c_wrapper.pc test: all cd test && ./RunTests -d -f -E 0.001 ../vowpalwabbit/vw ../vowpalwabbit/vw vowpal-wabbit-8.6.1.dfsg1/R/000077500000000000000000000000001332666127000155165ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/R/example.yaml000066400000000000000000000005531332666127000200400ustar00rootroot00000000000000# YAML FILE ### FORMAT ### #namespaceA: # varName: # - nameVar1 # - nameVar2 # keepSpace: # - false # #namespaceB: # varName: # - nameVar1 # - nameVar2 # keepSpace: # - true sepal: varName: - Sepal.|Length - Sepal.Width keepSpace: - false petal: varName: - Petal.Length - Petal.Width keepSpace: - false vowpal-wabbit-8.6.1.dfsg1/R/examples/000077500000000000000000000000001332666127000173345ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/R/examples/vw_example.R000066400000000000000000000105601332666127000216300ustar00rootroot00000000000000rm(list = ls(all = TRUE)); gc() # setwd('rvw_example') # library(ggplot2) # library(data.table) # library(pROC) library(r.vw) # create a folder called data system('mkdir data') #source('dt2vw.R') #source('rvw_example/vw.R') # Function used to select variables for each namespace get_feature_type <- function(X, threshold = 50, verbose = FALSE) { q_levels <- function (x) { if (data.table::is.data.table(x)) { unlist(x[, lapply(.SD, function(x) length(unique(x)))]) } else { apply(x, 2, function(x) length(unique(x))) } } lvs = q_levels(X) fact_vars = names(lvs[lvs < threshold]) num_vars = names(lvs[lvs >= threshold]) if (verbose) { print(data.frame(lvs)) } list(fact_vars = fact_vars, num_vars = num_vars) } # setwd where the data would be setwd('data') dt = diamonds dt = data.table::setDT(dt) target = 'y' data_types = get_feature_type(dt[, setdiff(names(dt), target), with=F], threshold = 50) namespaces = list(n = list(varName = data_types$num_vars, keepSpace=F), c = list(varName = data_types$fact_vars, keepSpace=F)) dt$y = with(dt, ifelse(y < 5.71, 1, -1)) dt2vw(dt, 'diamonds.vw', namespaces, target=target, weight=NULL) system('head -3 diamonds.vw') # prepare dataset for validation system('head -10000 diamonds.vw > X_train.vw ') system('tail -43940 diamonds.vw > X_valid.vw ') write.table(tail(dt$y,43940), file='valid_labels.txt', row.names = F, col.names = F, quote = F) training_data='X_train.vw' validation_data='X_valid.vw' validation_labels = "valid_labels.txt" out_probs = "out.txt" model = "mdl.vw" # AUC using perf - Download at: osmot.cs.cornell.edu/kddcup/software.html # It may not work, so a dependency of an R library has been added. See below. # Commented as could not work. # auc = vw(training_data, validation_data, loss = "logistic", # model, b = 25, learning_rate = 0.5, passes = 1, # l1 = NULL, l2 = NULL, early_terminate = NULL, # link_function = "--link=logistic", extra = NULL, out_probs = "out.txt", # validation_labels = validation_labels, verbose = TRUE, do_evaluation = TRUE) # Shows files in the working directory: /data list.files() auc = vw(training_data, validation_data, loss = "logistic", model, b = 25, learning_rate = 0.5, passes = 1, l1 = NULL, l2 = NULL, early_terminate = NULL, link_function = "--link=logistic", extra = NULL, out_probs = "out.txt", validation_labels = validation_labels, verbose = TRUE, do_evaluation = TRUE, use_perf=FALSE, plot_roc=TRUE) print(auc) # [1] 0.9944229 # AUC using pROC - Saving plots to disk ### create a parameter grid grid = expand.grid(l1=c(1e-07, 1e-08), l2=c(1e-07, 1e-08), eta=c(0.1, 0.05), extra=c('--nn 10', '')) cat('Running grid search\n') pdf('ROCs.pdf') aucs <- lapply(1:nrow(grid), function(i){ g = grid[i, ] auc = vw(training_data=training_data, # files relative paths validation_data=validation_data, validation_labels=validation_labels, model=model, # grid options loss='logistic', b=25, learning_rate=g[['eta']], passes=2, l1=g[['l1']], l2=g[['l2']], early_terminate=2, extra=g[['extra']], # ROC-AUC related options use_perf=FALSE, plot_roc=TRUE, do_evaluation = TRUE # If false doesn't compute AUC, use only for prediction ) auc }) dev.off() results = cbind(iter=1:nrow(grid), grid, auc=do.call(rbind, aucs)) print(results) # l1 l2 eta extra auc # 1 1e-07 1e-07 0.10 --nn 10 0.9964736 # 2 1e-08 1e-07 0.10 --nn 10 0.9964945 # 3 1e-07 1e-08 0.10 --nn 10 0.9964736 # 4 1e-08 1e-08 0.10 --nn 10 0.9964946 # 5 1e-07 1e-07 0.05 --nn 10 0.9956487 # 6 1e-08 1e-07 0.05 --nn 10 0.9956629 # 7 1e-07 1e-08 0.05 --nn 10 0.9956487 # 8 1e-08 1e-08 0.05 --nn 10 0.9956629 # 9 1e-07 1e-07 0.10 0.9878654 # 10 1e-08 1e-07 0.10 0.9919489 # 11 1e-07 1e-08 0.10 0.9878646 # 12 1e-08 1e-08 0.10 0.9919487 # 13 1e-07 1e-07 0.05 0.9883343 # 14 1e-08 1e-07 0.05 0.9915172 # 15 1e-07 1e-08 0.05 0.9883339 # 16 1e-08 1e-08 0.05 0.9915170 p = ggplot(results, aes(iter, auc, color=extra)) + geom_point(size=3) + theme_bw() + labs(list(x='Iteration', y='AUC', title='Logistic regression results')) print(p) ggsave('results_plot.png', plot=p) vowpal-wabbit-8.6.1.dfsg1/R/examples/vw_example_2.R000066400000000000000000000015101332666127000220440ustar00rootroot00000000000000library(r.vw) ## data data("diamonds", package = "ggplot2") dt = diamonds dt$y = with(dt, ifelse(y < 5.71, 1, -1)) ## separate train and validation data ind_train = sample(1:nrow(dt), 40000) dt_train = dt[ind_train,] dt_val = dt[-ind_train,] ## first method: creating the vw data files before training dt2vw(data = dt_train, fileName = "diamond_train.vw", target = "y") dt2vw(data = dt_val, fileName = "diamond_val.vw", target = "y") write.table(x = dt_val$y, file = "valid_labels.txt", row.names = F, col.names = F) auc1 = vw(training_data = "diamond_train.vw", validation_data = "diamond_val.vw", validation_labels = "valid_labels.txt", use_perf = F) ## 2 method: use directly data.frames auc2 = vw(training_data = dt_train, validation_data = dt_val, target = "y", use_perf = F) vowpal-wabbit-8.6.1.dfsg1/R/r.vw/000077500000000000000000000000001332666127000164125ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/R/r.vw/DESCRIPTION000066400000000000000000000005131332666127000201170ustar00rootroot00000000000000Package: r.vw Type: Package Title: interface between R and Vowpal Wabbit Version: 0.1 Date: 2015-10-07 Authors@R: person("First", "Last", email = "first.last@example.com", role = c("aut", "cre")) Description: More about what it does (maybe more than one line) License: BSD LazyData: TRUE Imports: pROC, data.table, ggplot2 vowpal-wabbit-8.6.1.dfsg1/R/r.vw/NAMESPACE000066400000000000000000000001571332666127000176340ustar00rootroot00000000000000# Generated by roxygen2 (4.1.1): do not edit by hand export(dt2vw) export(vw) import(data.table) import(pROC) vowpal-wabbit-8.6.1.dfsg1/R/r.vw/R/000077500000000000000000000000001332666127000166135ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/R/r.vw/R/dt2vw.R000066400000000000000000000201121332666127000200000ustar00rootroot00000000000000#'Create a vw data file from a R data.frame object #' #'@param data [data.table] data.table format (to be transformed) #'@param fileName [string] file name of the resulting data in VW-friendly format #'@param namespaces [list / yaml file] name of each namespace and each variable for each namespace #'can be a R list, or a YAML file example namespace with the IRIS database: #'namespaces = list(sepal = list(varName = c('Sepal.Length', 'Sepal.Width'), keepSpace=F), #'petal = list(varName = c('Petal.Length', 'Petal.Width'), keepSpace=F)) #'this creates 2 namespaces (sepal and petal) containing the variables defined by varName. #'keepSpace allows to keep or remove spaces in categorical variables #'example: "FERRARI 4Si" ==> "FERRARI_4Si" with keepSpace = F #'==> "FERRARI 4Si" with keepSpace = T (interpreted #'by VW as two distinct categorical variables) #'@param target [string] target of the data (target) #'@param weight [string] weight of each line of the dataset (importance) #'@param tag [string] tag of each line of the dataset #'@param hard_parse [bool] if equals true, parses the data more strictly to avoid feeding VW with false categorical #'variables like '_', or same variables perceived differently like "_var" and "var" #'@import data.table #'@export dt2vw <- function(data, fileName, namespaces = NULL, target, weight = NULL, tag = NULL, hard_parse = FALSE, append = FALSE) { data = setDT(data) #change target if its boolean to take values in {-1,1} if(is.logical(data[[target]]) || sum(levels(factor(data[[target]])) == levels(factor(c(0,1)))) == 2) { data[[target]][data[[target]] == TRUE] = 1 data[[target]][data[[target]] == FALSE] = -1 } #if namespaces = NULL, define a unique namespace if(is.null(namespaces)) { all_vars = colnames(data)[!colnames(data) %in% c(target, weight, tag)] namespaces <- list(A = list(varName = all_vars, keepSpace=FALSE)) } #parse variable names specChar = '\\(|\\)|\\||\\:' specCharSpace = '\\(|\\)|\\||\\:| ' parsingNames <- function(x) Reduce(c, lapply(x, function(X) gsub(specCharSpace,'_', X))) #parse categorical variables parsingVar <- function(x, keepSpace, hard_parse) { #remove leading and trailing spaces, then remove special characters then remove isolated underscores. if(!keepSpace) spch = specCharSpace else spch = specChar if(hard_parse) gsub('(^_( *|_*)+)|(^_$)|(( *|_*)+_$)|( +_+ +)',' ', gsub(specChar,'_', gsub('(^ +)|( +$)', '',x))) else gsub(spch, '_', x) } ### NAMESPACE LOAD WITH A YAML FILE if(typeof(namespaces) == "character" && length(namespaces) == 1 && str_sub(namespaces, -4, -1) == "yaml") { print("############### USING YAML FILE FOR LOADING THE NAMESPACES ###############") library(yaml) namespaces = yaml.load_file(namespaces) } ### AVOIDING DATA FORMAT PROBLEMS setnames(data, names(data), parsingNames(names(data))) names(namespaces) <- parsingNames(names(namespaces)) for(x in names(namespaces)) namespaces[[x]]$varName = parsingNames(namespaces[[x]]$varName) target = parsingNames(target) if(!is.null(tag)) tag = parsingNames(tag) if(!is.null(weight)) weight = parsingNames(weight) ### INITIALIZING THE HEADER AND INDEX #Header: list of variables'name for each namespace #Index: check if the variable is numerical (->TRUE) or categorical (->FALSE) Header = list() Index = list() for(namespaceName in names(namespaces)) { Index[[namespaceName]] = vapply(data[,namespaces[[namespaceName]][['varName']],with=FALSE], is.numeric, logical(1)) #Header[[namespaceName]][Index[[namespaceName]]] = namespaces[[namespaceName]][['varName']][Index[[namespaceName]]] Header[[namespaceName]] = namespaces[[namespaceName]][['varName']] ### ESCAPE THE CATEGORICAL VARIABLES if(namespaces[[namespaceName]]$keepSpace) Header[[namespaceName]][!Index[[namespaceName]]] = paste0("eval(parse(text = 'parsingVar(", Header[[namespaceName]][!Index[[namespaceName]]], ", keepSpace = T, hard_parse = hard_parse)'))") else Header[[namespaceName]][!Index[[namespaceName]]] = paste0("eval(parse(text = 'parsingVar(", Header[[namespaceName]][!Index[[namespaceName]]], ", keepSpace = F, hard_parse = hard_parse)'))") } #appending the name of the variable to its value for each categorical variable sapply(Index, FUN = function(x){sapply(names(x), FUN = function(y){if(!x[[y]]){ set(data, i=NULL, y, paste0(y,"_",data[[y]])) }})}) ### FIRST PART OF THE VW DATA FORMAT: target, weight, tag formatDataVW = '' argexpr = character(0) ### Label can be null, no training is performed if(!is.null(target)) { # Both weight and tag are not null if(!is.null(weight) && !is.null(tag)) { formatDataVW = '%f %f %s' argexpr = paste(target, weight, tag, sep = ', ') } # Weight is null, tag is not null else if(is.null(weight) && !is.null(tag)) { formatDataVW = '%f %s' argexpr = paste(target, tag, sep = ', ') } # Weight is not null, tag is null else if(!is.null(weight) && is.null(tag)) { formatDataVW = '%f %f' argexpr = paste(target, weight, sep = ', ') } # We just output target else { formatDataVW = '%f' argexpr = target } } ### ADDING THE FORMAT FOR THE VARIABLES OF EACH NAMESPACE, AND CREATING THE ARGUMENT VECTOR for(namespaceName in names(namespaces)) { header = Header[[namespaceName]] index = Index[[namespaceName]] formatNumeric = paste0(header[index], rep(":%f ", sum(index)), collapse = "") formatCategorical = paste0(rep("%s", sum(!index)), collapse = " ") formatDataVW = c(formatDataVW, paste0(namespaceName, ' ', formatNumeric, formatCategorical)) paramexpr = paste0(c(header[index], header[!index] ), collapse=', ') argexpr = paste0(c(argexpr, paramexpr), collapse = ', ') } ### FULL VW DATA STRING (NOT FORMATTED YET) : (%target %weight |A num1:%f %s |B num2:%f %s) if (!is.null(tag)) { formatDataVW = paste0(formatDataVW, collapse = '|') } else { formatDataVW = paste0(formatDataVW, collapse = ' |') } formatDataVW = paste0("sprintf2('", formatDataVW, "',",argexpr, ")") ### FORMATTING USING THE DATA.TABLE DYNAMICS TO OBTAIN THE FINAL VW DATA STRING temp = data[, eval(parse(text = formatDataVW))] temp = paste0(temp, collapse = '\n') ### WRITING THE DATA TO A FILE if(!append) con = file(fileName,"w") else con = file(fileName,"a") writeLines(temp,con = con) close(con) } ## Work around the "only 100 arguments are allowed" error ## in base::sprintf(). Only works with 'fmt' of length 1. ## Work around the "only 100 arguments are allowed" error ## in base::sprintf(). Only works with 'fmt' of length 1. sprintf2 <- function(fmt, ...) { MAX_NVAL <- 99L args <- list(...) if (length(args) <= MAX_NVAL) return(sprintf(fmt, ...)) stopifnot(length(fmt) == 1L) not_a_spec_at <- gregexpr("%%", fmt, fixed=TRUE)[[1L]] not_a_spec_at <- c(not_a_spec_at, not_a_spec_at + 1L) spec_at <- setdiff(gregexpr("%", fmt, fixed=TRUE)[[1L]], not_a_spec_at) nspec <- length(spec_at) if (length(args) < nspec) stop("too few arguments") if (nspec <= MAX_NVAL) { break_points <- integer(0) } else { break_points <- seq(MAX_NVAL + 1L, nspec, by=MAX_NVAL) } break_from <- c(1L, break_points) break_to <- c(break_points - 1L, nspec) fmt_break_at <- spec_at[break_points] fmt_chunks <- substr(rep.int(fmt, length(fmt_break_at) + 1L), c(1L, fmt_break_at), c(fmt_break_at - 1L, nchar(fmt))) ans_chunks <- mapply( function(fmt_chunk, from, to) do.call(sprintf, c(list(fmt_chunk), args[from:to])), fmt_chunks, break_from, break_to ) paste(apply(ans_chunks,1, paste, collapse = ""), collapse = "\n") } vowpal-wabbit-8.6.1.dfsg1/R/r.vw/R/vw.R000066400000000000000000000166731332666127000174070ustar00rootroot00000000000000#'Trains Vowpal Wabbit models from R. #' #'This function is fairly simple and extensible to other problems, so far just supports binary classification. #'Thought to be used in conjuction to perf in order to compute validation metrics on left out datasets. #'See osmot.cs.cornell.edu/kddcup/software.html for more info about perf. #' #'@param training_data a [data.frame] or path to a vw data file #'@param validation_data a [data.frame] or path to a vw data file #'@param model name of the model file #'@param path_vw_data_train if training_data is a [data.frame], the path to which to save #'the vw data file. If NULL, the data is stored in a temporary folder and deleted before exiting #'the function #'@param path_vw_data_val if validation_data is a [data.frame], the path to which to save #'the vw data file. If NULL, the data is stored in a temporary folder and deleted before exiting #'the function #'@param target if training_data or validation_data is a [data.frame], the name of the variable #'in the [data.frame] corresponding to the target variable #'@param namespaces used only if training_data or validation_data is a [data.frame]. See arguments #' of dt2vw #'@param weight used only if training_data or validation_data is a [data.frame]. See arguments #' of dt2vw #'@param tag used only if training_data or validation_data is a [data.frame]. See arguments #' of dt2vw #' @param out_probs path to file where to save the predictions. If NULL, the file is stored in #' a temporary file then deleted. #'@param loss loss function. By default logistic. #'@param b number of bits for the weight vector allocation #'@param learning_rate #'@param passes #'@param l1 l1 regularization #'@param l2 l2 regularization #'@param early_terminate #'@param interactions Add interaction terms. Can be passed in extra also. #'@param link_function used to generate predictions #'@param extra These is where more VW commands can be passed as text #'@param out_probs filename to write probabilities #'@param validation_labels file to look for validation data true labels - to compute auc using perf #'or roc_auc() from the R package pROC. If the validation data is a [data.frame] and validation_labels #'is NULL, the validation labels file is deleted before exiting the function. If validation_labels is not #'NULL, it indicates the path where validation labels should be stored. #'@param verbose mostly used to debug but shows AUC and the vw command used to train the model #'@param keep_preds TRUE (default) to return a vector of the predictions #'@param do_evaluation TRUE to compute auc on validation_data. Use FALSE, to just score data #'@param use_perf use perf to compute auc. Otherwise, auc_roc() from the R package pROC is used. #'@examples #'# 1. Create a training set (training_data) and validation set (validation_data) in vw format. #'# 2. Install perf #'# 3. Create a vector of true labels for the validation dataset, in the [0, 1] range. This is what perf likes. #'# 4. Run one model with the present code #' #'\dontrun{ #' auc = vw(training_data='X_train.vw', validation_data='X_valid.vw', #' loss='logistic', model='mdl.vw', b=25, learning_rate=0.5, #' passes=20, l1=1e-08, l2=1e-08, early_terminate=2, #' interactions=NULL, extra='--stage_poly') #'} #'@import pROC #'@export vw <- function(training_data, validation_data, model='mdl.vw', path_vw_data_train = NULL, path_vw_data_val = NULL, target = NULL, namespaces = NULL, weight = NULL, tag = NULL, out_probs= NULL, validation_labels= NULL, loss='logistic', b=25, learning_rate=0.5, passes=1, l1=NULL, l2=NULL, early_terminate=NULL, link_function='--link=logistic', extra=NULL, keep_preds = TRUE, do_evaluation=TRUE, use_perf=TRUE, plot_roc=TRUE, verbose=TRUE){ ## this should not create an unnecessary copy of the arguments data_args = list(train = training_data, val = validation_data) path_data_args = list(path_vw_data_train, path_vw_data_val) for(i in seq_along(data_args)) { if (inherits(data_args[[i]], "data.frame")) { if(is.null(target)) stop(paste0(names(data_args)[i], "data argument: input argument is a data.frame, argument 'target' should be specified ")) if(! is.character(path_data_args[[i]])) path_data_args[[i]] = file.path(tempdir(),paste0(names(data_args)[i],".vw")) dt2vw(data = data_args[[i]], fileName = path_data_args[[i]], namespaces = namespaces, target = target, weight = weight, tag = tag) } else { path_data_args[[i]] = data_args[[i]] } } training_data = path_data_args[[1]] cmd = sprintf('vw -d %s --loss_function %s -f %s', training_data, loss, model) cmd = sprintf('%s --learning_rate=%s --passes %s -c', cmd, learning_rate, passes) if(!is.null(l1)) cmd = sprintf('%s --l1 %s', cmd, l1) if(!is.null(l2)) cmd = sprintf('%s --l2 %s', cmd, l2) if(!is.null(b)) cmd = sprintf('%s -b %s', cmd, b) if(!is.null(early_terminate)) cmd = sprintf('%s --early_terminate %s', cmd, early_terminate) if(!is.null(extra)) cmd = sprintf('%s %s', cmd, extra) cat('Model parameters\n') cat(cmd) cat('\n') system(cmd) if(is.null(out_probs)) { out_probs = file.path(tempdir(),"preds.vw") del_prob = TRUE } else del_prob = FALSE validation_data = path_data_args[[2]] predict = sprintf('vw -t -i %s -p %s %s -d %s', model, out_probs, link_function, validation_data) system(predict) if(do_evaluation){ if(inherits(data_args[[2]], "data.frame")) { if(is.null(validation_labels)) { del_val = TRUE validation_labels = file.path(tempdir(),"val_labs.vw") } else del_val = FALSE write.table(x = data_args[[2]][[target]], file = validation_labels, row.names = FALSE, col.names = FALSE) } if(use_perf){ # compute auc using perf eval_model = sprintf("perf -ROC -files %s %s | cut -c8-14", validation_labels, out_probs) auc = system(eval_model, intern = TRUE) } else { auc = roc_auc(out_probs, validation_labels, plot_roc, cmd) } } if(verbose && do_evaluation){ cat('Model Parameters\n') cat(cmd) verbose_log = sprintf('AUC: %s', auc) print(verbose_log) } if(keep_preds) probs = fread(out_probs)[['V1']] ## delete temporary files for(i in seq_along(data_args)) if(inherits(data_args[[i]], "data.frame")) file.remove(path_data_args[[i]]) if(del_prob) file.remove(out_probs) if(exists("del_val") && del_val) file.remove(validation_labels) return(list(auc=auc, preds=probs)) } # Reads labels file (from the validation dataset) # and probabilities (out_file) from vowpal wabbit # Also added an option to plot or not the AUC roc_auc <- function(out_probs, validation_labels, plot_roc, cmd, ...){ probs = fread(out_probs)[['V1']] labels = fread(validation_labels)[['V1']] if(!identical(length(probs), length(labels))) stop('The length of the probabilities and labels is different') # Fix cmd for adding it in title cmd = vapply(strsplit(cmd, '-f'), function(x) paste0(x, collapse='\n'), character(1)) cmd = vapply(strsplit(cmd, '-c'), function(x) paste0(x, collapse='\n'), character(1)) # Plot ROC curve and return AUC roc = roc(labels, probs, auc=TRUE, print.auc=TRUE, print.thres=TRUE) if(plot_roc){ print(plot.roc(roc, main=cmd, cex.main = 0.5, ...)) } auc_value = as.numeric(roc$auc[[1]]) return(auc_value) } vowpal-wabbit-8.6.1.dfsg1/R/r.vw/man/000077500000000000000000000000001332666127000171655ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/R/r.vw/man/dt2vw.Rd000066400000000000000000000030061332666127000205210ustar00rootroot00000000000000% Generated by roxygen2 (4.1.1): do not edit by hand % Please edit documentation in R/dt2vw.R \name{dt2vw} \alias{dt2vw} \title{Create a vw data file from a R data.frame object} \usage{ dt2vw(data, fileName, namespaces = NULL, target, weight = NULL, tag = NULL, hard_parse = F, append = F) } \arguments{ \item{data}{[data.table] data.table format (to be transformed)} \item{fileName}{[string] file name of the resulting data in VW-friendly format} \item{namespaces}{[list / yaml file] name of each namespace and each variable for each namespace can be a R list, or a YAML file example namespace with the IRIS database: namespaces = list(sepal = list(varName = c('Sepal.Length', 'Sepal.Width'), keepSpace=F), petal = list(varName = c('Petal.Length', 'Petal.Width'), keepSpace=F)) this creates 2 namespaces (sepal and petal) containing the variables defined by varName. keepSpace allows to keep or remove spaces in categorical variables example: "FERRARI 4Si" ==> "FERRARI_4Si" with keepSpace = F ==> "FERRARI 4Si" with keepSpace = T (interpreted by VW as two distinct categorical variables)} \item{target}{[string] target of the data (target)} \item{weight}{[string] weight of each line of the dataset (importance)} \item{tag}{[string] tag of each line of the dataset} \item{hard_parse}{[bool] if equals true, parses the data more strictly to avoid feeding VW with false categorical variables like '_', or same variables perceived differently like "_var" and "var"} } \description{ Create a vw data file from a R data.frame object } vowpal-wabbit-8.6.1.dfsg1/R/r.vw/man/vw.Rd000066400000000000000000000073651332666127000201230ustar00rootroot00000000000000% Generated by roxygen2 (4.1.1): do not edit by hand % Please edit documentation in R/vw.R \name{vw} \alias{vw} \title{Trains Vowpal Wabbit models from R.} \usage{ vw(training_data, validation_data, model = "mdl.vw", path_vw_data_train = NULL, path_vw_data_val = NULL, target = NULL, namespaces = NULL, weight = NULL, tag = NULL, out_probs = NULL, validation_labels = NULL, loss = "logistic", b = 25, learning_rate = 0.5, passes = 1, l1 = NULL, l2 = NULL, early_terminate = NULL, link_function = "--link=logistic", extra = NULL, do_evaluation = TRUE, use_perf = TRUE, plot_roc = TRUE, verbose = TRUE) } \arguments{ \item{training_data}{a [data.frame] or path to a vw data file} \item{validation_data}{a [data.frame] or path to a vw data file} \item{model}{name of the model file} \item{path_vw_data_train}{if training_data is a [data.frame], the path to which to save the vw data file. If NULL, the data is stored in a temporary folder and deleted before exiting the function} \item{path_vw_data_val}{if validation_data is a [data.frame], the path to which to save the vw data file. If NULL, the data is stored in a temporary folder and deleted before exiting the function} \item{target}{if training_data or validation_data is a [data.frame], the name of the variable in the [data.frame] corresponding to the target variable} \item{namespaces}{used only if training_data or validation_data is a [data.frame]. See arguments of dt2vw} \item{weight}{used only if training_data or validation_data is a [data.frame]. See arguments of dt2vw} \item{tag}{used only if training_data or validation_data is a [data.frame]. See arguments of dt2vw} \item{out_probs}{path to file where to save the predictions. If NULL, the file is stored in a temporary file then deleted.} \item{validation_labels}{file to look for validation data true labels - to compute auc using perf or roc_auc() from the R package pROC. If the validation data is a [data.frame] and validation_labels is NULL, the validation labels file is deleted before exiting the function. If validation_labels is not NULL, it indicates the path where validation labels should be stored.} \item{loss}{loss function. By default logistic.} \item{b}{number of bits for the weight vector allocation} \item{learning_rate}{} \item{passes}{} \item{l1}{l1 regularization} \item{l2}{l2 regularization} \item{early_terminate}{} \item{link_function}{used to generate predictions} \item{extra}{These is where more VW commands can be passed as text} \item{keep_preds}{TRUE (default) to return a vector of the predictions} \item{do_evaluation}{TRUE to compute auc on validation_data. Use FALSE, to just score data} \item{use_perf}{use perf to compute auc. Otherwise, auc_roc() from the R package pROC is used.} \item{verbose}{mostly used to debug but shows AUC and the vw command used to train the model} \item{interactions}{Add interaction terms. Can be passed in extra also.} \item{out_probs}{filename to write probabilities} } \description{ This function is fairly simple and extensible to other problems, so far just supports binary classification. Thought to be used in conjuction to perf in order to compute validation metrics on left out datasets. See osmot.cs.cornell.edu/kddcup/software.html for more info about perf. } \examples{ # 1. Create a training set (training_data) and validation set (validation_data) in vw format. # 2. Install perf # 3. Create a vector of true labels for the validation dataset, in the [0, 1] range. This is what perf likes. # 4. Run one model with the present code \dontrun{ auc = vw(training_data='X_train.vw', validation_data='X_valid.vw', loss='logistic', model='mdl.vw', b=25, learning_rate=0.5, passes=20, l1=1e-08, l2=1e-08, early_terminate=2, interactions=NULL, extra='--stage_poly') } } vowpal-wabbit-8.6.1.dfsg1/R/rscripts_readme.md000066400000000000000000000015631332666127000212330ustar00rootroot00000000000000### R scripts for vowpal wabbit ### download the r.vw package r.vw contains two functions: dt2vw and vw installation requires devtools: ``` install.packages("devtools") devtools::install_github("JohnLangford/vowpal_wabbit", subdir = "R/r.vw") ``` ### Convert a data.table to vowpal wabbit format: dt2vw() Allows to convert the data.table in chunks using the append=TRUE option. Make sure to define the correct data type before using the function. The function handles different data types as expected from R, so these should be defined already in the data.table object. ### Call vowpal wabbit from R: vw() Follow the example in R/examples/vw_example.R and R/examples/vw_example_2.R. It uses the vw.R function to run VW using system commands, so it is simple to adapt to different models. It also computes the AUC on a validation test set and plots the ROC curve if needed. vowpal-wabbit-8.6.1.dfsg1/README.deploy.txt000066400000000000000000000031411332666127000203050ustar00rootroot00000000000000Deploying Vowpal Wabbit Nick Nussbaum 9/7/14 The deploy_vw project makes a folder containing vw.exe,spanning tree.exe together with the the Redistributable Visual Studio 2013 c++ runtime. You can put the folder on a machine which does not have Visual Studio 2013 installed and run the vw.exe without installing anything. If your environment has Visual Studio 2013 installed, there's no point in doing this since the runtime files are already installed on the system. Th deploy_vw projects are by default unchecked in the build configuration manager so they aren't built everytime the solution is compiled. Normally you'd give people the x86 or x64 release versions of the deployment folder If you're debugging clusters with a remote debugger, you may want to build the debug versions. These includes the pdb files as well as the debug versions of the runtime. These debug runtime versions are not redistributable. You can use them in your machines that have a Visual Studio Licence but can't distribute them to machines without a Visual Studio license. Check the Microsoft documentation for details on this restriction. To make the deployment In the Build > Configuration Manager menu set the Active Configuration and Active Platform. X64 and release would be a common choice. Select thed deploy_vw project in Solution Explorer select Build Menu< Clean deploy_vw select Build Menu, Rebuild deploy_vw This will trigger builds of vw and spanning tree if needed and then create the folder vowpal_wabbit\vowpalwabbit\deploy\x64\Release. This folder can be copied to other Windows machines in order to run Vowpal Wabbit. vowpal-wabbit-8.6.1.dfsg1/README.md000066400000000000000000000202061332666127000165740ustar00rootroot00000000000000``` /* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ ``` Vowpal Wabbit [![Build Status](https://travis-ci.org/JohnLangford/vowpal_wabbit.png)](https://travis-ci.org/JohnLangford/vowpal_wabbit) [![Windows Build Status](https://ci.appveyor.com/api/projects/status/github/JohnLangford/vowpal_wabbit?branch=master&svg=true)](https://ci.appveyor.com/project/JohnLangford/vowpal-wabbit) [![Coverage Status](https://coveralls.io/repos/JohnLangford/vowpal_wabbit/badge.svg)](https://coveralls.io/r/JohnLangford/vowpal_wabbit) [![Total Alerts](https://img.shields.io/lgtm/alerts/g/JohnLangford/vowpal_wabbit.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/JohnLangford/vowpal_wabbit/alerts/) This is the *vowpal wabbit* fast online learning code. For Windows, look at [README.windows.txt](./README.windows.txt) ## Prerequisite software These prerequisites are usually pre-installed on many platforms. However, you may need to consult your favorite package manager (*yum*, *apt*, *MacPorts*, *brew*, ...) to install missing software. - [Boost](http://www.boost.org) library, with the `Boost::Program_Options` library option enabled. - The zlib compression library + headers. In linux distros: package `zlib-devel` (Red Hat/CentOS), or `zlib1g-dev` (Ubuntu/Debian) - lsb-release (RedHat/CentOS: redhat-lsb-core, Debian: lsb-release, Ubuntu: you're all set, OSX: not required) - GNU *autotools*: *autoconf*, *automake*, *libtool*, *autoheader*, et. al. This is not a strict prereq. On many systems (notably Ubuntu with `libboost-program-options-dev` installed), the provided `Makefile` works fine. - (optional) [git](http://git-scm.com) if you want to check out the latest version of *vowpal wabbit*, work on the code, or even contribute code to the main project. ## Getting the code You can download the latest version from [here](https://github.com/JohnLangford/vowpal_wabbit/wiki/Download). The very latest version is always available via 'github' by invoking one of the following: ``` ## For the traditional ssh-based Git interaction: $ git clone git://github.com/JohnLangford/vowpal_wabbit.git ## You can also try the following SSH URL: $ git clone git@github.com:JohnLangford/vowpal_wabbit.git ## For HTTP-based Git interaction $ git clone https://github.com/JohnLangford/vowpal_wabbit.git ``` ## Compiling You should be able to build the *vowpal wabbit* on most systems with: ``` $ make $ make test # (optional) ``` If that fails, try: ``` $ ./autogen.sh $ make $ make test # (optional) $ make install ``` Note that `./autogen.sh` requires *automake* (see the prerequisites, above.) `./autogen.sh`'s command line arguments are passed directly to `configure` as if they were `configure` arguments and flags. Note that `./autogen.sh` will overwrite the supplied `Makefile`, including the `Makefile`s in sub-directories, so keeping a copy of the `Makefile`s may be a good idea before running `autogen.sh`. If your original `Makefile`s were overwritten by `autogen.sh` calling `automake`, you may always get the originals back from git using: ``` git checkout Makefile */Makefile ``` Be sure to read the wiki: https://github.com/JohnLangford/vowpal_wabbit/wiki for the tutorial, command line options, etc. The 'cluster' directory has it's own documentation for cluster parallel use, and the examples at the end of test/Runtests give some example flags. ## C++ Optimization The default C++ compiler optimization flags are very aggressive. If you should run into a problem, consider creating and running `configure` with the `--enable-debug` option, e.g.: ``` $ ./configure --enable-debug ``` or passing your own compiler flags via the `OPTIM_FLAGS` make variable: ``` $ make OPTIM_FLAGS="-O0 -g" ``` ## Ubuntu/Debian specific info On Ubuntu/Debian/Mint and similar the following sequence should work for building the latest from github: ``` # -- Get libboost program-options and zlib: apt-get install libboost-program-options-dev zlib1g-dev # -- Get the python libboost bindings (python subdir) - optional: apt-get install libboost-python-dev # -- Get the vw source: git clone git://github.com/JohnLangford/vowpal_wabbit.git # -- Build: cd vowpal_wabbit make make test # (optional) make install ``` ### Ubuntu advanced build options (clang and static) If you prefer building with `clang` instead of `gcc` (much faster build and slighly faster executable), install `clang` and change the `make` step slightly: ``` apt-get install clang make CXX=clang++ ``` A statically linked `vw` executable that is not sensitive to boost version upgrades and can be safely copied between different Linux versions (e.g. even from Ubuntu to Red-Hat) can be built and tested with: ``` make CXX='clang++ -static' clean vw test # ignore warnings ``` ## Debian Python 3 Binding Ensure boost-library and c-compiler are installed: ``` apt-get install libboost-program-options-dev zlib1g-dev libboost-python-dev clang make automake ``` Set Python 3.x and its boost-library as default: ``` update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 update-alternatives --install /usr/bin/python python /usr/bin/python3.x 2 ln -sf /usr/lib/x86_64-linux-gnu/libboost_python-py3x.a /usr/lib/x86_64-linux-gnu/libboost_python.a ln -sf /usr/lib/x86_64-linux-gnu/libboost_python-py3x.so /usr/lib/x86_64-linux-gnu/libboost_python.so ``` Install Vowpal Wabbit via pip: ``` pip3 install vowpalwabbit ``` ## Mac OS X-specific info OSX requires _glibtools_, which is available via the [brew](http://brew.sh) or [MacPorts](https://www.macports.org) package managers. ### Complete brew install of 8.4 ``` brew install vowpal-wabbit ``` [The homebrew formula for VW is located on github](https://github.com/Homebrew/homebrew-core/blob/master/Formula/vowpal-wabbit.rb). ### Manual install of Vowpal Wabbit #### OSX Dependencies (if using Brew): ``` brew install libtool brew install autoconf brew install automake brew install boost brew install boost-python ``` #### OSX Dependencies (if using MacPorts): ``` ## Install glibtool and other GNU autotool friends: $ port install libtool autoconf automake ## Build Boost for Mac OS X 10.8 and below $ port install boost +no_single -no_static +openmpi +python27 configure.cxx_stdlib=libc++ configure.cxx=clang++ ## Build Boost for Mac OS X 10.9 and above $ port install boost +no_single -no_static +openmpi +python27 ``` #### OSX Manual compile: *Mac OS X 10.8 and below*: ``configure.cxx_stdlib=libc++`` and ``configure.cxx=clang++`` ensure that ``clang++`` uses the correct C++11 functionality while building Boost. Ordinarily, ``clang++`` relies on the older GNU ``g++`` 4.2 series header files and ``stdc++`` library; ``libc++`` is the ``clang`` replacement that provides newer C++11 functionality. If these flags aren't present, you will likely encounter compilation errors when compiling _vowpalwabbit/cbify.cc_. These error messages generally contain complaints about ``std::to_string`` and ``std::unique_ptr`` types missing. To compile: ``` $ sh autogen.sh --enable-libc++ $ make $ make test # (optional) ``` #### OSX Python Binding installation with Anaconda When using Anaconda as the source for Python the default Boost libraries used in the Makefile need to be adjusted. Below are the steps needed to install the Python bindings for VW. This should work for Python 2 and 3. Adjust the directories to match where anaconda is installed. ``` # create anaconda environment with boost conda create --name vw boost source activate vw git clone https://github.com/JohnLangford/vowpal_wabbit.git cd vowpal_wabbit # edit Makefile # change BOOST_INCLUDE to use anaconda env dir: /anaconda/envs/vw/include # change BOOST_LIBRARY to use anaconda lib dir: /andaconda/envs/vw/lib cd python python setup.py install ``` ## Code Documentation To browse the code more easily, do `make doc` and then point your browser to `doc/html/index.html`. Note that documentation generates class diagrams using [Graphviz](https://www.graphviz.org). For best results, ensure that it is installed beforehand. vowpal-wabbit-8.6.1.dfsg1/README.windows.txt000066400000000000000000000162661332666127000205170ustar00rootroot00000000000000Originally by Chris Quirk ************************************************************************************************************** Notes for building VW under Visual Studio 2013 on Windows 8.1 9/02/2014 Nick Nussbaum nickn@seanet.com Replace source dependencies with Nuget 04/29/2015 Sharat Chikkerur sharat.chikkerur@gmail.com Added ANTLR based unit test 10/2/2015 Markus Cozowicz marcozo@microsoft.com ************************************************************************************************************** (1) Get Tools You'll need a Visual Studio 2013 (or 2015) installed that includes c# and c++ You should install Visual Studio 2013 Update 5: https://www.microsoft.com/en-us/download/details.aspx?id=48129 You'll also need the Windows SDK which you can download from Microsoft at http://msdn.microsoft.com/en-us/windows/desktop/bg162891.aspx You'll need Nuget integration with visual studio http://docs.nuget.org/consume You'll need Java to run unit tests http://www.oracle.com/technetwork/java/javase/downloads/jre8-downloads-2133155.html ************************************************************************************************************** (2) open a copy various command shells (a) Open an x86 command shell: run the Visual Studio 2013 Tools / VS2013 x86 Native Tools Command Prompt or cmd.exe /k "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\vcvarsall.bat" x86 (b) Open an x64 command shell: run the Visual Studio 2013 Tools / VS2013 x64 Cross Tools Command Prompt or cmd.exe /k "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\vcvarsall.bat" x86_amd64 (c) Open the Git bash shell "C:\Program Files (x86)\Git\bin\sh.exe" --login -i or some other bash shell (d) If none of these work, try locating the command line tools based on directions here: https://docs.microsoft.com/en-us/dotnet/framework/tools/developer-command-prompt-for-vs ************************************************************************************************************** (3) Setup Directories I use c:\src\vw as my %ROOT% directory; (a) mkdir c:\src (b) mkdir c:\src\vw ************************************************************************************************************** (4) Get Vowpal Wabbit (a) In a command shell to %ROOT% : "cd c:\src\vw" (b) run "git clone https://github.com/JohnLangford/vowpal_wabbit.git" details of the changes are in bottom of this file. ************************************************************************************************************** (5) Restore nugets (a) In a command shell to %ROOT%\vowpalwabbit\vowpalwabbit : "cd c:\src\vw\vowpalwabbit\vowpalwabbit" (b) run ".nuget\nuget restore vw.sln" This will restore the ANTLR nuget which is needed before Visual Studio loads the solution. ************************************************************************************************************** (5) Build Vowpal Wabbit (a) Using visual studio Open %ROOT%\vowpal_wabbit\vowpalwabbit\vw.sln in Visual Studio 2013 Set startup project as vw (or the test project) Select x64 platform (Configuration Manager \ Active solution platfrom) Select x64 as test platform (Test \ Test settings \ Default Processor Architecture) run build>rebuild solution or run batch build Binaries will be in one of these four directories, based on whether you built DEBUG or RELEASE bits and whether you are building x64. %ROOT%\vowpal_wabbit\vowpalwabbit\x64\Debug\vw.exe %ROOT%\vowpal_wabbit\vowpalwabbit\x64\Release\vw.exe Missing nugets will be installed during the build. (b) Using command line (available configurations are "Release" and "Debug". Available platforms are "x64" and "Win32") run>msbuild /p:Configuration="Release" /p:Platform="x64" vw.sln Note: If you failed to do so before opening the solution, the cs_unittest project is in a "not loaded" state. After executing the above you'll have to hit "Reload" (Project / Context Menu) in Visual Studio. ************************************************************************************************************** (8) Test There's a new test batch file that runs a quick test on all four configurations (a) go to a windows command shell (a) cd c:\src\vw\test (b) run test\test_2_winvw.bat ************************************************************************************************************** (9) Appendix: The Gory Details of the patch and VW upgrades (a) misc files adds this content to this file ReadMe.Windows.txt adds the file vowpal_wabbit\zlibpatch.txt a patch for xlib adds the file test\test_2_winvw.bat a simple test of x86 and x64 training and prediction (b) Changes to Zlib This Zlib patch includes the following fixes; Convert to Visual Studio 2013 solution The fix in the prior section to correctly use DLL versions of the runtime for 32bit platforms Changes to use only two fields in zlibvc.def VERSTION - VERSION 1.2.8 + VERSION 1.28 since otherwise the compiler will complain about more than 2 fields and ignore them. add /safeseh to the x86 assembler so Visual Studio will not generate an error "unable to generate SAFESSH image" This is not need for x64 since it happens by default In the properties sheet for zlibvc The pre build command line for x64 release should be fixed -cd ..\..\contrib\masmx64 +cd ..\..\masmx64 Code generation: Runtime Library for windows release set to Multi-threaded DLL (/MD) not /MT for zlibvc and zlibstat Otherwise VS13 will complain about multiple runtime specification while trying to autolink (c) Change to Boost 1.56.0 (d) Changes to VowpalWabbit changes vw projects and solutions to run under Visual Studio 2013 rather than Visual Studio 2012 change vw projects to redefine $(BoostIncludeDir) to refer to Boost 1.56.0 change vw projects to define $(BoostLibDir) to refer to Boost 1.56.0 vowpalwabbit/vw_static.vcxproj Define $(IncludePath) change $(ZlibDir) to use \contrib\vstudio\vc11 rather than vc10 change x64 version DebugInformationFormat to use "ProgramDatabase" and not the invalid "EditAndContinue" change IntermediateFolderPath to include ProjectName so two projects aren't trying to build in the same folder add searn_multiclasstask.cc to the project change include path to all use macros $(VC_IncludePath);$(WindowsSDK_IncludePath) change additional dependencies to use $(SolutionDir)$(PlatformShortName)\$(Configuration)\vw_static.lib adds a reference to the WindowsSDKDir Include\um change vw_static properties for debug 64bit to /Zi from /Zl to shut up some warnings. change the vw and static_vw to use n intermediate directories that appends the $(ProjectName). this avoid various conflicts and warnings caused by dumping into the same directory. change link build copies to use PlatformShortName rather than PlatformName to use x86 rather than Win32 Change the anycpu confuuration for problems with cs_test vowpalwabbit/vw.sln change configurations to use Debug|x86 from Debug|AnyCpu c_test/c_test.vcxproj change to VS 12 change configurations to use Debug|x86 from Debug|AnyCpu change cs_test to use x86 and x64 rather than anycpu change test file specs to reference the .../../... test directory vowpal-wabbit-8.6.1.dfsg1/acinclude.d/000077500000000000000000000000001332666127000174665ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/acinclude.d/ax_boost_base.m4000066400000000000000000000241531332666127000225450ustar00rootroot00000000000000# =========================================================================== # http://www.gnu.org/software/autoconf-archive/ax_boost_base.html # =========================================================================== # # SYNOPSIS # # AX_BOOST_BASE([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) # # DESCRIPTION # # Test for the Boost C++ libraries of a particular version (or newer) # # If no path to the installed boost library is given the macro searchs # under /usr, /usr/local, /opt and /opt/local and evaluates the # $BOOST_ROOT environment variable. Further documentation is available at # . # # This macro calls: # # AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS) # # And sets: # # HAVE_BOOST # # LICENSE # # Copyright (c) 2008 Thomas Porschberg # Copyright (c) 2009 Peter Adolphs # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 20 AC_DEFUN([AX_BOOST_BASE], [ AC_ARG_WITH([boost], [AS_HELP_STRING([--with-boost@<:@=ARG@:>@], [use Boost library from a standard location (ARG=yes), from the specified location (ARG=), or disable it (ARG=no) @<:@ARG=yes@:>@ ])], [ if test "$withval" = "no"; then want_boost="no" elif test "$withval" = "yes"; then want_boost="yes" ac_boost_path="" else want_boost="yes" ac_boost_path="$withval" fi ], [want_boost="yes"]) AC_ARG_WITH([boost-libdir], AS_HELP_STRING([--with-boost-libdir=LIB_DIR], [Force given directory for boost libraries. Note that this will override library path detection, so use this parameter only if default library detection fails and you know exactly where your boost libraries are located.]), [ if test -d "$withval" then ac_boost_lib_path="$withval" else AC_MSG_ERROR(--with-boost-libdir expected directory name) fi ], [ac_boost_lib_path=""] ) if test "x$want_boost" = "xyes"; then boost_lib_version_req=ifelse([$1], ,1.20.0,$1) boost_lib_version_req_shorten=`expr $boost_lib_version_req : '\([[0-9]]*\.[[0-9]]*\)'` boost_lib_version_req_major=`expr $boost_lib_version_req : '\([[0-9]]*\)'` boost_lib_version_req_minor=`expr $boost_lib_version_req : '[[0-9]]*\.\([[0-9]]*\)'` boost_lib_version_req_sub_minor=`expr $boost_lib_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` if test "x$boost_lib_version_req_sub_minor" = "x" ; then boost_lib_version_req_sub_minor="0" fi WANT_BOOST_VERSION=`expr $boost_lib_version_req_major \* 100000 \+ $boost_lib_version_req_minor \* 100 \+ $boost_lib_version_req_sub_minor` AC_MSG_CHECKING(for boostlib >= $boost_lib_version_req) succeeded=no dnl On 64-bit systems check for system libraries in both lib64 and lib. dnl The former is specified by FHS, but e.g. Debian does not adhere to dnl this (as it rises problems for generic multi-arch support). dnl The last entry in the list is chosen by default when no libraries dnl are found, e.g. when only header-only libraries are installed! libsubdirs="lib" ax_arch=`uname -m` if test $ax_arch = x86_64 -o $ax_arch = ppc64 -o $ax_arch = s390x -o $ax_arch = sparc64; then libsubdirs="lib64 lib lib64" fi dnl first we check the system location for boost libraries dnl this location ist chosen if boost libraries are installed with the --layout=system option dnl or if you install boost with RPM if test "$ac_boost_path" != ""; then BOOST_CPPFLAGS="-I$ac_boost_path/include" for ac_boost_path_tmp in $libsubdirs; do if test -d "$ac_boost_path"/"$ac_boost_path_tmp" ; then BOOST_LDFLAGS="-L$ac_boost_path/$ac_boost_path_tmp" break fi done elif test "$cross_compiling" != yes; then for ac_boost_path_tmp in /usr /usr/local /opt /opt/local ; do if test -d "$ac_boost_path_tmp/include/boost" && test -r "$ac_boost_path_tmp/include/boost"; then for libsubdir in $libsubdirs ; do if ls "$ac_boost_path_tmp/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi done BOOST_LDFLAGS="-L$ac_boost_path_tmp/$libsubdir" BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include" break; fi done fi dnl overwrite ld flags if we have required special directory with dnl --with-boost-libdir parameter if test "$ac_boost_lib_path" != ""; then BOOST_LDFLAGS="-L$ac_boost_lib_path" fi CPPFLAGS_SAVED="$CPPFLAGS" CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" export CPPFLAGS LDFLAGS_SAVED="$LDFLAGS" LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" export LDFLAGS AC_REQUIRE([AC_PROG_CXX]) AC_LANG_PUSH(C++) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ @%:@include ]], [[ #if BOOST_VERSION >= $WANT_BOOST_VERSION // Everything is okay #else # error Boost version is too old #endif ]])],[ AC_MSG_RESULT(yes) succeeded=yes found_system=yes ],[ ]) AC_LANG_POP([C++]) dnl if we found no boost with system layout we search for boost libraries dnl built and installed without the --layout=system option or for a staged(not installed) version if test "x$succeeded" != "xyes"; then _version=0 if test "$ac_boost_path" != ""; then if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` V_CHECK=`expr $_version_tmp \> $_version` if test "$V_CHECK" = "1" ; then _version=$_version_tmp fi VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` BOOST_CPPFLAGS="-I$ac_boost_path/include/boost-$VERSION_UNDERSCORE" done fi else if test "$cross_compiling" != yes; then for ac_boost_path in /usr /usr/local /opt /opt/local ; do if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` V_CHECK=`expr $_version_tmp \> $_version` if test "$V_CHECK" = "1" ; then _version=$_version_tmp best_path=$ac_boost_path fi done fi done VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE" if test "$ac_boost_lib_path" = ""; then for libsubdir in $libsubdirs ; do if ls "$best_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi done BOOST_LDFLAGS="-L$best_path/$libsubdir" fi fi if test "x$BOOST_ROOT" != "x"; then for libsubdir in $libsubdirs ; do if ls "$BOOST_ROOT/stage/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi done if test -d "$BOOST_ROOT" && test -r "$BOOST_ROOT" && test -d "$BOOST_ROOT/stage/$libsubdir" && test -r "$BOOST_ROOT/stage/$libsubdir"; then version_dir=`expr //$BOOST_ROOT : '.*/\(.*\)'` stage_version=`echo $version_dir | sed 's/boost_//' | sed 's/_/./g'` stage_version_shorten=`expr $stage_version : '\([[0-9]]*\.[[0-9]]*\)'` V_CHECK=`expr $stage_version_shorten \>\= $_version` if test "$V_CHECK" = "1" -a "$ac_boost_lib_path" = "" ; then AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT) BOOST_CPPFLAGS="-I$BOOST_ROOT" BOOST_LDFLAGS="-L$BOOST_ROOT/stage/$libsubdir" fi fi fi fi CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" export CPPFLAGS LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" export LDFLAGS AC_LANG_PUSH(C++) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ @%:@include ]], [[ #if BOOST_VERSION >= $WANT_BOOST_VERSION // Everything is okay #else # error Boost version is too old #endif ]])],[ AC_MSG_RESULT(yes) succeeded=yes found_system=yes ],[ ]) AC_LANG_POP([C++]) fi if test "$succeeded" != "yes" ; then if test "$_version" = "0" ; then AC_MSG_NOTICE([[We could not detect the boost libraries (version $boost_lib_version_req_shorten or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option. If you are sure you have boost installed, then check your version number looking in . See http://randspringer.de/boost for more documentation.]]) else AC_MSG_NOTICE([Your boost libraries seems to old (version $_version).]) fi # execute ACTION-IF-NOT-FOUND (if present): ifelse([$3], , :, [$3]) else AC_SUBST(BOOST_CPPFLAGS) AC_SUBST(BOOST_LDFLAGS) AC_DEFINE(HAVE_BOOST,,[define if the Boost library is available]) # execute ACTION-IF-FOUND (if present): ifelse([$2], , :, [$2]) fi CPPFLAGS="$CPPFLAGS_SAVED" LDFLAGS="$LDFLAGS_SAVED" fi ]) vowpal-wabbit-8.6.1.dfsg1/acinclude.d/ax_boost_program_options.m4000066400000000000000000000113111332666127000250450ustar00rootroot00000000000000# ============================================================================ # http://www.gnu.org/software/autoconf-archive/ax_boost_program_options.html # ============================================================================ # # SYNOPSIS # # AX_BOOST_PROGRAM_OPTIONS # # DESCRIPTION # # Test for program options library from the Boost C++ libraries. The macro # requires a preceding call to AX_BOOST_BASE. Further documentation is # available at . # # This macro calls: # # AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) # # And sets: # # HAVE_BOOST_PROGRAM_OPTIONS # # LICENSE # # Copyright (c) 2009 Thomas Porschberg # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 20 AC_DEFUN([AX_BOOST_PROGRAM_OPTIONS], [ AC_ARG_WITH([boost-program-options], AS_HELP_STRING([--with-boost-program-options@<:@=special-lib@:>@], [use the program options library from boost - it is possible to specify a certain library for the linker e.g. --with-boost-program-options=boost_program_options-gcc-mt-1_33_1 ]), [ if test "$withval" = "no"; then want_boost="no" elif test "$withval" = "yes"; then want_boost="yes" ax_boost_user_program_options_lib="" else want_boost="yes" ax_boost_user_program_options_lib="$withval" fi ], [want_boost="yes"] ) if test "x$want_boost" = "xyes"; then AC_REQUIRE([AC_PROG_CC]) export want_boost CPPFLAGS_SAVED="$CPPFLAGS" CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" export CPPFLAGS LDFLAGS_SAVED="$LDFLAGS" LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" export LDFLAGS AC_CACHE_CHECK([whether the Boost::Program_Options library is available], ax_cv_boost_program_options, [AC_LANG_PUSH(C++) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include ]], [[boost::program_options::options_description generic("Generic options"); return 0;]])], ax_cv_boost_program_options=yes, ax_cv_boost_program_options=no) AC_LANG_POP([C++]) ]) if test "$ax_cv_boost_program_options" = yes; then AC_DEFINE(HAVE_BOOST_PROGRAM_OPTIONS,,[define if the Boost::PROGRAM_OPTIONS library is available]) BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` if test "x$ax_boost_user_program_options_lib" = "x"; then for libextension in `ls $BOOSTLIBDIR/libboost_program_options*.so* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_program_options.*\)\.so.*$;\1;'` `ls $BOOSTLIBDIR/libboost_program_options*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_program_options.*\)\.a*$;\1;'` ; do ax_lib=${libextension} AC_CHECK_LIB($ax_lib, exit, [BOOST_PROGRAM_OPTIONS_LIB="-l$ax_lib"; AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) link_program_options="yes"; break], [link_program_options="no"]) done if test "x$link_program_options" != "xyes"; then for libextension in `ls $BOOSTLIBDIR/boost_program_options*.dll* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_program_options.*\)\.dll.*$;\1;'` `ls $BOOSTLIBDIR/boost_program_options*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_program_options.*\)\.a*$;\1;'` ; do ax_lib=${libextension} AC_CHECK_LIB($ax_lib, exit, [BOOST_PROGRAM_OPTIONS_LIB="-l$ax_lib"; AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) link_program_options="yes"; break], [link_program_options="no"]) done fi else for ax_lib in $ax_boost_user_program_options_lib boost_program_options-$ax_boost_user_program_options_lib; do AC_CHECK_LIB($ax_lib, main, [BOOST_PROGRAM_OPTIONS_LIB="-l$ax_lib"; AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) link_program_options="yes"; break], [link_program_options="no"]) done fi if test "x$ax_lib" = "x"; then AC_MSG_ERROR(Could not find a version of the library!) fi if test "x$link_program_options" != "xyes"; then AC_MSG_ERROR([Could not link against [$ax_lib] !]) fi fi CPPFLAGS="$CPPFLAGS_SAVED" LDFLAGS="$LDFLAGS_SAVED" fi ]) vowpal-wabbit-8.6.1.dfsg1/acinclude.d/ax_check_zlib.m4000066400000000000000000000120651332666127000225210ustar00rootroot00000000000000# =========================================================================== # http://www.gnu.org/software/autoconf-archive/ax_check_zlib.html # =========================================================================== # # SYNOPSIS # # AX_CHECK_ZLIB() # # DESCRIPTION # # This macro searches for an installed zlib library. If nothing was # specified when calling configure, it searches first in /usr/local and # then in /usr, /opt/local and /sw. If the --with-zlib=DIR is specified, # it will try to find it in DIR/include/zlib.h and DIR/lib/libz.a. If # --without-zlib is specified, the library is not searched at all. # # If either the header file (zlib.h) or the library (libz) is not found, # the configuration exits on error, asking for a valid zlib installation # directory or --without-zlib. # # The macro defines the symbol HAVE_LIBZ if the library is found. You # should use autoheader to include a definition for this symbol in a # config.h file. Sample usage in a C/C++ source is as follows: # # #ifdef HAVE_LIBZ # #include # #endif /* HAVE_LIBZ */ # # LICENSE # # Copyright (c) 2008 Loic Dachary # Copyright (c) 2010 Bastien Chevreux # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . # # As a special exception, the respective Autoconf Macro's copyright owner # gives unlimited permission to copy, distribute and modify the configure # scripts that are the output of Autoconf when processing the Macro. You # need not follow the terms of the GNU General Public License when using # or distributing such scripts, even though portions of the text of the # Macro appear in them. The GNU General Public License (GPL) does govern # all other use of the material that constitutes the Autoconf Macro. # # This special exception to the GPL applies to versions of the Autoconf # Macro released by the Autoconf Archive. When you make and distribute a # modified version of the Autoconf Macro, you may extend this special # exception to the GPL to apply to your modified version as well. #serial 8 AU_ALIAS([CHECK_ZLIB], [AX_CHECK_ZLIB]) AC_DEFUN([AX_CHECK_ZLIB], # # Handle user hints # [AC_MSG_CHECKING(if zlib is wanted) AC_ARG_WITH(zlib, [ --with-zlib=DIR root directory path of zlib installation [defaults to /usr/local or /usr if not found in /usr/local] --without-zlib to disable zlib usage completely], [if test "$withval" != no ; then zlib_places="/usr/local /usr /opt/local /sw" AC_MSG_RESULT(yes) if test -d "$withval" then zlib_places="$withval $zlib_places" else AC_MSG_WARN([Sorry, $withval does not exist, checking usual places]) fi else AC_MSG_RESULT(no) fi], [AC_MSG_RESULT(yes)]) # # Locate zlib, if wanted # if test -n "${zlib_places}" then # check the user supplied or any other more or less 'standard' place: # Most UNIX systems : /usr/local and /usr # MacPorts / Fink on OSX : /opt/local respectively /sw for ZLIB_HOME in ${zlib_places} ; do if test -f "${ZLIB_HOME}/include/zlib.h"; then break; fi ZLIB_HOME="" done # if zlib.h was nowhere to be found, give a notice and bail out if test ! -n "${ZLIB_HOME}"; then AC_MSG_ERROR(No zlib.h in any include directory of ${zlib_places}: either specify a valid zlib installation with --with-zlib=DIR or disable zlib usage with --without-zlib) fi ZLIB_OLD_LDFLAGS=$LDFLAGS ZLIB_OLD_CPPFLAGS=$LDFLAGS LDFLAGS="$LDFLAGS -L${ZLIB_HOME}/lib" CPPFLAGS="$CPPFLAGS -I${ZLIB_HOME}/include" AC_LANG_SAVE AC_LANG_C AC_CHECK_LIB(z, inflateEnd, [zlib_cv_libz=yes], [zlib_cv_libz=no]) AC_CHECK_HEADER(zlib.h, [zlib_cv_zlib_h=yes], [zlib_cv_zlib_h=no]) AC_LANG_RESTORE if test "$zlib_cv_libz" = "yes" -a "$zlib_cv_zlib_h" = "yes" then # # If both library and header were found, use them # AC_CHECK_LIB(z, inflateEnd) AC_MSG_CHECKING(zlib in ${ZLIB_HOME}) AC_MSG_RESULT(ok) else # # If either header or library was not found, revert and bomb # AC_MSG_CHECKING(zlib in ${ZLIB_HOME}) LDFLAGS="$ZLIB_OLD_LDFLAGS" CPPFLAGS="$ZLIB_OLD_CPPFLAGS" AC_MSG_RESULT(failed) AC_MSG_ERROR(either specify a valid zlib installation with --with-zlib=DIR or disable zlib usage with --without-zlib) fi fi ]) vowpal-wabbit-8.6.1.dfsg1/acinclude.d/ax_cxx_compile_stdcxx_11.m4000066400000000000000000000112751332666127000246360ustar00rootroot00000000000000# ============================================================================ # http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx_11.html # ============================================================================ # # SYNOPSIS # # AX_CXX_COMPILE_STDCXX_11([ext|noext],[mandatory|optional]) # # DESCRIPTION # # Check for baseline language coverage in the compiler for the C++11 # standard; if necessary, add switches to CXXFLAGS to enable support. # # The first argument, if specified, indicates whether you insist on an # extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g. # -std=c++11). If neither is specified, you get whatever works, with # preference for an extended mode. # # The second argument, if specified 'mandatory' or if left unspecified, # indicates that baseline C++11 support is required and that the macro # should error out if no mode with that support is found. If specified # 'optional', then configuration proceeds regardless, after defining # HAVE_CXX11 if and only if a supporting mode is found. # # LICENSE # # Copyright (c) 2008 Benjamin Kosnik # Copyright (c) 2012 Zack Weinberg # Copyright (c) 2013 Roy Stogner # Copyright (c) 2014 Alexey Sokolov # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 4 m4_define([_AX_CXX_COMPILE_STDCXX_11_testbody], [[ template struct check { static_assert(sizeof(int) <= sizeof(T), "not big enough"); }; struct Base { virtual void f() {} }; struct Child : public Base { virtual void f() override {} }; typedef check> right_angle_brackets; int a; decltype(a) b; typedef check check_type; check_type c; check_type&& cr = static_cast(c); auto d = a; auto l = [](){}; ]]) AC_DEFUN([AX_CXX_COMPILE_STDCXX_11], [dnl m4_if([$1], [], [], [$1], [ext], [], [$1], [noext], [], [m4_fatal([invalid argument `$1' to AX_CXX_COMPILE_STDCXX_11])])dnl m4_if([$2], [], [ax_cxx_compile_cxx11_required=true], [$2], [mandatory], [ax_cxx_compile_cxx11_required=true], [$2], [optional], [ax_cxx_compile_cxx11_required=false], [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX_11])]) AC_LANG_PUSH([C++])dnl ac_success=no AC_CACHE_CHECK(whether $CXX supports C++11 features by default, ax_cv_cxx_compile_cxx11, [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])], [ax_cv_cxx_compile_cxx11=yes], [ax_cv_cxx_compile_cxx11=no])]) if test x$ax_cv_cxx_compile_cxx11 = xyes; then ac_success=yes fi m4_if([$1], [noext], [], [dnl if test x$ac_success = xno; then for switch in -std=gnu++11 -std=gnu++0x; do cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx11_$switch]) AC_CACHE_CHECK(whether $CXX supports C++11 features with $switch, $cachevar, [ac_save_CXXFLAGS="$CXXFLAGS" CXXFLAGS="$CXXFLAGS $switch" AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])], [eval $cachevar=yes], [eval $cachevar=no]) CXXFLAGS="$ac_save_CXXFLAGS"]) if eval test x\$$cachevar = xyes; then CXXFLAGS="$CXXFLAGS $switch" ac_success=yes break fi done fi]) m4_if([$1], [ext], [], [dnl if test x$ac_success = xno; then for switch in -std=c++11 -std=c++0x; do cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx11_$switch]) AC_CACHE_CHECK(whether $CXX supports C++11 features with $switch, $cachevar, [ac_save_CXXFLAGS="$CXXFLAGS" CXXFLAGS="$CXXFLAGS $switch" AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])], [eval $cachevar=yes], [eval $cachevar=no]) CXXFLAGS="$ac_save_CXXFLAGS"]) if eval test x\$$cachevar = xyes; then CXXFLAGS="$CXXFLAGS $switch" ac_success=yes break fi done fi]) AC_LANG_POP([C++]) if test x$ax_cxx_compile_cxx11_required = xtrue; then if test x$ac_success = xno; then AC_MSG_ERROR([*** A compiler with support for C++11 language features is required.]) fi else if test x$ac_success = xno; then HAVE_CXX11=0 AC_MSG_NOTICE([No compiler with C++11 support was found]) else HAVE_CXX11=1 AC_DEFINE(HAVE_CXX11,1, [define if the compiler supports basic C++11 syntax]) fi AC_SUBST(HAVE_CXX11) fi ]) vowpal-wabbit-8.6.1.dfsg1/acinclude.d/ax_pthread.m4000066400000000000000000000304401332666127000220500ustar00rootroot00000000000000# =========================================================================== # http://www.gnu.org/software/autoconf-archive/ax_pthread.html # =========================================================================== # # SYNOPSIS # # AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) # # DESCRIPTION # # This macro figures out how to build C programs using POSIX threads. It # sets the PTHREAD_LIBS output variable to the threads library and linker # flags, and the PTHREAD_CFLAGS output variable to any special C compiler # flags that are needed. (The user can also force certain compiler # flags/libs to be tested by setting these environment variables.) # # Also sets PTHREAD_CC to any special C compiler that is needed for # multi-threaded programs (defaults to the value of CC otherwise). (This # is necessary on AIX to use the special cc_r compiler alias.) # # NOTE: You are assumed to not only compile your program with these flags, # but also link it with them as well. e.g. you should link with # $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS # # If you are only building threads programs, you may wish to use these # variables in your default LIBS, CFLAGS, and CC: # # LIBS="$PTHREAD_LIBS $LIBS" # CFLAGS="$CFLAGS $PTHREAD_CFLAGS" # CC="$PTHREAD_CC" # # In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant # has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to that name # (e.g. PTHREAD_CREATE_UNDETACHED on AIX). # # Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the # PTHREAD_PRIO_INHERIT symbol is defined when compiling with # PTHREAD_CFLAGS. # # ACTION-IF-FOUND is a list of shell commands to run if a threads library # is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it # is not found. If ACTION-IF-FOUND is not specified, the default action # will define HAVE_PTHREAD. # # Please let the authors know if this macro fails on any platform, or if # you have any other suggestions or comments. This macro was based on work # by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help # from M. Frigo), as well as ac_pthread and hb_pthread macros posted by # Alejandro Forero Cuervo to the autoconf macro repository. We are also # grateful for the helpful feedback of numerous users. # # Updated for Autoconf 2.68 by Daniel Richard G. # # LICENSE # # Copyright (c) 2008 Steven G. Johnson # Copyright (c) 2011 Daniel Richard G. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation, either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . # # As a special exception, the respective Autoconf Macro's copyright owner # gives unlimited permission to copy, distribute and modify the configure # scripts that are the output of Autoconf when processing the Macro. You # need not follow the terms of the GNU General Public License when using # or distributing such scripts, even though portions of the text of the # Macro appear in them. The GNU General Public License (GPL) does govern # all other use of the material that constitutes the Autoconf Macro. # # This special exception to the GPL applies to versions of the Autoconf # Macro released by the Autoconf Archive. When you make and distribute a # modified version of the Autoconf Macro, you may extend this special # exception to the GPL to apply to your modified version as well. #serial 17 AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD]) AC_DEFUN([AX_PTHREAD], [ AC_REQUIRE([AC_CANONICAL_HOST]) AC_LANG_PUSH([C]) ax_pthread_ok=no # We used to check for pthread.h first, but this fails if pthread.h # requires special compiler flags (e.g. on True64 or Sequent). # It gets checked for in the link test anyway. # First of all, check if the user has set any of the PTHREAD_LIBS, # etcetera environment variables, and if threads linking works using # them: if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" save_LIBS="$LIBS" LIBS="$PTHREAD_LIBS $LIBS" AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS]) AC_TRY_LINK_FUNC(pthread_join, ax_pthread_ok=yes) AC_MSG_RESULT($ax_pthread_ok) if test x"$ax_pthread_ok" = xno; then PTHREAD_LIBS="" PTHREAD_CFLAGS="" fi LIBS="$save_LIBS" CFLAGS="$save_CFLAGS" fi # We must check for the threads library under a number of different # names; the ordering is very important because some systems # (e.g. DEC) have both -lpthread and -lpthreads, where one of the # libraries is broken (non-POSIX). # Create a list of thread flags to try. Items starting with a "-" are # C compiler flags, and other items are library names, except for "none" # which indicates that we try without any flags at all, and "pthread-config" # which is a program returning the flags for the Pth emulation library. ax_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" # The ordering *is* (sometimes) important. Some notes on the # individual items follow: # pthreads: AIX (must check this before -lpthread) # none: in case threads are in libc; should be tried before -Kthread and # other compiler flags to prevent continual compiler warnings # -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) # lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) # -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) # -pthreads: Solaris/gcc # -mthreads: Mingw32/gcc, Lynx/gcc # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it # doesn't hurt to check since this sometimes defines pthreads too; # also defines -D_REENTRANT) # ... -mt is also the pthreads flag for HP/aCC # pthread: Linux, etcetera # --thread-safe: KAI C++ # pthread-config: use pthread-config program (for GNU Pth library) case "${host_cpu}-${host_os}" in *solaris*) # On Solaris (at least, for some versions), libc contains stubbed # (non-functional) versions of the pthreads routines, so link-based # tests will erroneously succeed. (We need to link with -pthreads/-mt/ # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather # a function called by this macro, so we could check for that, but # who knows whether they'll stub that too in a future libc.) So, # we'll just look for -pthreads and -lpthread first: ax_pthread_flags="-pthreads pthread -mt -pthread $ax_pthread_flags" ;; *-darwin*) ax_pthread_flags="-pthread $ax_pthread_flags" ;; esac if test x"$ax_pthread_ok" = xno; then for flag in $ax_pthread_flags; do case $flag in none) AC_MSG_CHECKING([whether pthreads work without any flags]) ;; -*) AC_MSG_CHECKING([whether pthreads work with $flag]) PTHREAD_CFLAGS="$flag" ;; pthread-config) AC_CHECK_PROG(ax_pthread_config, pthread-config, yes, no) if test x"$ax_pthread_config" = xno; then continue; fi PTHREAD_CFLAGS="`pthread-config --cflags`" PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" ;; *) AC_MSG_CHECKING([for the pthreads library -l$flag]) PTHREAD_LIBS="-l$flag" ;; esac save_LIBS="$LIBS" save_CFLAGS="$CFLAGS" LIBS="$PTHREAD_LIBS $LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" # Check for various functions. We must include pthread.h, # since some functions may be macros. (On the Sequent, we # need a special flag -Kthread to make this header compile.) # We check for pthread_join because it is in -lpthread on IRIX # while pthread_create is in libc. We check for pthread_attr_init # due to DEC craziness with -lpthreads. We check for # pthread_cleanup_push because it is one of the few pthread # functions on Solaris that doesn't have a non-functional libc stub. # We try pthread_create on general principles. AC_LINK_IFELSE([AC_LANG_PROGRAM([#include static void routine(void *a) { a = 0; } static void *start_routine(void *a) { return a; }], [pthread_t th; pthread_attr_t attr; pthread_create(&th, 0, start_routine, 0); pthread_join(th, 0); pthread_attr_init(&attr); pthread_cleanup_push(routine, 0); pthread_cleanup_pop(0) /* ; */])], [ax_pthread_ok=yes], []) LIBS="$save_LIBS" CFLAGS="$save_CFLAGS" AC_MSG_RESULT($ax_pthread_ok) if test "x$ax_pthread_ok" = xyes; then break; fi PTHREAD_LIBS="" PTHREAD_CFLAGS="" done fi # Various other checks: if test "x$ax_pthread_ok" = xyes; then save_LIBS="$LIBS" LIBS="$PTHREAD_LIBS $LIBS" save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. AC_MSG_CHECKING([for joinable pthread attribute]) attr_name=unknown for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], [int attr = $attr; return attr /* ; */])], [attr_name=$attr; break], []) done AC_MSG_RESULT($attr_name) if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then AC_DEFINE_UNQUOTED(PTHREAD_CREATE_JOINABLE, $attr_name, [Define to necessary symbol if this constant uses a non-standard name on your system.]) fi AC_MSG_CHECKING([if more special flags are required for pthreads]) flag=no case "${host_cpu}-${host_os}" in *-aix* | *-freebsd* | *-darwin*) flag="-D_THREAD_SAFE";; *-osf* | *-hpux*) flag="-D_REENTRANT";; *solaris*) if test "$GCC" = "yes"; then flag="-D_REENTRANT" else flag="-mt -D_REENTRANT" fi ;; esac AC_MSG_RESULT(${flag}) if test "x$flag" != xno; then PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" fi AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT], ax_cv_PTHREAD_PRIO_INHERIT, [ AC_LINK_IFELSE([ AC_LANG_PROGRAM([[#include ]], [[int i = PTHREAD_PRIO_INHERIT;]])], [ax_cv_PTHREAD_PRIO_INHERIT=yes], [ax_cv_PTHREAD_PRIO_INHERIT=no]) ]) AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes"], AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], 1, [Have PTHREAD_PRIO_INHERIT.])) LIBS="$save_LIBS" CFLAGS="$save_CFLAGS" # More AIX lossage: must compile with xlc_r or cc_r if test x"$GCC" != xyes; then AC_CHECK_PROGS(PTHREAD_CC, xlc_r cc_r, ${CC}) else PTHREAD_CC=$CC fi else PTHREAD_CC="$CC" fi AC_SUBST(PTHREAD_LIBS) AC_SUBST(PTHREAD_CFLAGS) AC_SUBST(PTHREAD_CC) # Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: if test x"$ax_pthread_ok" = xyes; then ifelse([$1],,AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.]),[$1]) : else ax_pthread_ok=no $2 fi AC_LANG_POP ])dnl AX_PTHREAD vowpal-wabbit-8.6.1.dfsg1/appveyor.yml000066400000000000000000000030201332666127000177000ustar00rootroot00000000000000version: 1.0.{build} os: Windows Server 2012 R2 configuration: - Release platform: x64 shallow_clone: true clone_folder: C:\vowpal_wabbit # need to install nuget packages before Visual Studio starts to make ANTLR targets available. build_script: - vcpkg install cpprestsdk:x64-windows - cd c:\vowpal_wabbit - vowpalwabbit\.nuget\nuget install -o vowpalwabbit\packages cs\cs\packages.config - vowpalwabbit\.nuget\nuget install -o vowpalwabbit\packages cs\azure\packages.config - vowpalwabbit\.nuget\nuget install -o vowpalwabbit\packages cs\azure_worker\packages.config - vowpalwabbit\.nuget\nuget install -o vowpalwabbit\packages cs\setup_bundle\packages.config - vowpalwabbit\.nuget\nuget install -o vowpalwabbit\packages cs\unittest\packages.config - vowpalwabbit\.nuget\nuget install -o vowpalwabbit\packages python\windows27\packages.config - vowpalwabbit\.nuget\nuget install -o vowpalwabbit\packages python\windows35\packages.config - vowpalwabbit\.nuget\nuget install -o vowpalwabbit\packages vowpalwabbit\packages.config - '"C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe" "C:\vowpal_wabbit\vowpalwabbit\vw.sln" /m /verbosity:normal /p:Configuration=Release;Platform=x64' test_script: - ps: >- if($env:PLATFORM -eq "x64") { cd c:\vowpal_wabbit vstest.console /Platform:x64 /inIsolation vowpalwabbit\x64\Release\cs_unittest.dll /TestCaseFilter:"TestCategory!=NotOnVSO" } on_failure: - ps: Get-ChildItem C:\Users\appveyor\AppData\Local\CrashDumps\\*.dmp | % { Push-AppveyorArtifact $_.FullName -FileName $_.Name } vowpal-wabbit-8.6.1.dfsg1/autogen.sh000077500000000000000000000022141332666127000173150ustar00rootroot00000000000000#!/bin/sh if [ -z $CXX ]; then if [ -x "`which g++`" ]; then CXX=g++ elif [ -x "`which clang++`" ]; then CXX=clang++ fi fi case $( uname -s ) in Darwin) alias vwlibtool=glibtoolize if [ -z $AC_PATH ]; then if [ -d /opt/local/share ]; then AC_PATH="/opt/local/share" else AC_PATH="/usr/local/share" fi fi ;; Linux) AC_PATH=/usr/share if [[ "${@#--with-boost}" = "$@" ]]; then ldconfig="" for p in `echo ${PATH} | sed 's/:/ /g'` /sbin /usr/sbin; do if test -x ${p}/ldconfig; then ldconfig=${p}/ldconfig break fi done if test "x${ldconfig}" = x; then ldconfig=ldconfig fi LIBFILE=`${ldconfig} -p | grep program_options | tail -n 1 | cut -d '>' -f 2` echo "Boost at: $LIBFILE" BOOST_DIR_ARG="--with-boost-libdir=`dirname $LIBFILE`" echo "Using $BOOST_DIR_ARG" else BOOST_DIR_ARG='' fi alias vwlibtool=libtoolize ;; *) alias vwlibtool=libtoolize ${AC_PATH:=/usr/share} ;; esac vwlibtool -f -c && aclocal -I ./acinclude.d -I $AC_PATH/aclocal && autoheader && touch README && automake -ac -Woverride && autoconf && ./configure "$@" $BOOST_DIR_ARG CXX=$CXX vowpal-wabbit-8.6.1.dfsg1/big_tests/000077500000000000000000000000001332666127000173005ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/HOWTO.use_it.txt000066400000000000000000000063011332666127000222300ustar00rootroot00000000000000Help file for using this testing framework ------------------------------------------ If you ever forget what you're currently reading, you can always see it again by typing make help To run all the tests until one fails, type make To run all the tests regardless of failures, type make -i Either way, you would be making the default target, named 'valid'. (Naming it 'test' would get confusing when you start referring to data train/test splits.) To clean up so that you can start over, type make clean To download and prepare all the relevant data at once, type make prepData To erase all the data, type make eraseData If you change some code so that the output of some tests changes, or you add some new tests, and you want to save the new output so that future outputs will be compared with it, type make expected You can work with individual tests by prepending their name to the make target, as in make 2.valid make t17.clean make myNewTest.expected Of course, you can make multiple targets together, as in make 2.clean 2.valid There are also pre-defined groups of tests that you can run, such as make regression_group If you want to create the correct outputs for a test without actually running it, type make $testName.pretend This can be useful if you want to run test X, which depends on the outputs of test Y, but you can't run test Y for some reason. The "valid" target for each test is actually a "run" followed by a "compare". Thus make 3.run 3.compare is equivalent to make 3.valid and it's possible to invoke either part without the other. If you want to use an executable other than ../vowpalwabbit/vw, you can supply it on the command line like this: make EXEC=myProgram or make ../vowpalwabbit/vw a sym-link to it. Other variables, such as ARCH, DIFF, TIME, WGET, and GREP, can also be set on the command line similarly. If you want to pass extra parameters to all tests, set the EP variable, like this: make EP="--random_seed 13" If you want to check speed in addition to accuracy, invoke with TIMING=y . Due to the many factors that can change execution speed, we do not expect speed results to be portable across machines. Therefore, "expected" speed files are not checked into the repository. If you'd like to do regression testing for speed, you should `make TIMING=y; make TIMING=y expected` on your particular machine before making any code changes. In fact, it's a good idea to then `make TIMING=y` a couple more times, and `make TIMING=y expected` again if the speed drops significantly. This can happen even on the same machine, if it's dividing attention between multiple processes. If you want to add or modify tests, read HOWTO.write_new_tests.txt . Working with data archives -------------------------- To save time and bandwidth, you can create and retrieve all the data used in all the tests at once. To create an archive of all the data, type make ARF= archive You can then stash the archive wherever you like, probably at a URL-accessible location in the cloud. Later, to retrieve the data again, you can type make URL= prepData This will cause the data to be downloaded and installed in prepared form, so that you don't need to run the preparation steps. vowpal-wabbit-8.6.1.dfsg1/big_tests/HOWTO.write_new_tests.txt000066400000000000000000000070641332666127000241740ustar00rootroot00000000000000HOWTO write new tests for the big_tests test suite ================================================== There are 3 steps in writing a new test. 1. Add code to dataSets.mk for obtaining and preparing relevant data, if it's not already there for other tests. 2. Add some variable settings to the file testDetails.mk. 3. Create the "expected" files. Note that your new tests WILL NOT EVEN RUN unless at least the 'out' and 'err' expected files exist in the right place. Read on for details. Step 1: dataSets.mk ------------------- The data will typically live under dataSets/ . Look at dataSets.mk for examples of how to add new datasets, and to see what data sets are already there. Whenever you add a new data set, don't forget to add its name to the 'allData' target at the top of dataSets.mk . Step 2: testDetails.mk ---------------------- Be sure to use := and not = to set these variables. All variables are optional. For most tests, you will want to set the variables *.params and *.inData, but it's possible to have tests without them. Here's an example of a complete test specification: 1a.inData := $(dataDir)/mnist.dir/train.prep 1a.params := --oaa 10 -d $(1a.inData) -f $(stageDir)/1a.dir/mnist.model -b 24 --adaptive --invariant --holdout_off -l 0.1 --nn 40 --passes 24 -k --compressed --cache_file $(stageDir)/1a.dir/mnist.cache The file that *.inData refers to is always treated as a prerequisite for running a test. So if the file doesn't exist, an attempt will be made to prepare it, which might also involve downloading it. By default, STDOUT will be saved in the file 'out'. STDERR will be filtered through `grep "average loss"`, and the results will be saved in the file 'err'. Other created files that should be compared to "expected" files should be captured by the *.otherOutputs variable. For example: myTest.otherOutputs := 0001_ftrl.model If you want to capture different parts of STDOUT and STDERR, you can set the variables *.STDOUT_COMPARATOR_REGEXP and *.STDERR_COMPARATOR_REGEXP, respectively. For example, to save all of STDERR without filtering, set myTest.STDERR_COMPARATOR_REGEXP := "." To use a non-standard executable (e.g. library_example), give its path to the *.exec variable, relative to the directory of the top level Makefile. For example: myTest.exec := $(TOP_MK_DIR)/../vowpalwabbit/library_example A test can have any set of targets as prerequisites, in addition to the value of *.inData, including those that merely run other tests (*.run targets) and those that check whether another test passed (*.valid targets). List inter-test dependencies like this: myTest.deps: test2.run test4.valid Unfortunatley, tracking of inter-test dependencies as above doesn't work for older versions of gnu make (< 4.0), due to bugs in those older versions. Upgrade if you can. Groups of tests are defined in testGroups.mk. You can add your tests to these groups or create new groups. Step 3: expected files ---------------------- Every test will create files named 'out' and 'err' (even if they are empty), and possibly others. When you run `make $testName.compare` (which is the 2nd part of running `make $testName.valid`) these files are compared to the expected files with the same names in the directory expected/$testName/ . The $testName.compare target WILL NOT RUN unless the expected files exist in that directory. So, the first time you create a test, you should `make $testName.run` and `make $testName.expected`. Only then will you be able to `make $testName.valid`. If you want to share you test with others, don't forget to `git add expected/$testName/`. vowpal-wabbit-8.6.1.dfsg1/big_tests/Makefile000066400000000000000000000207021332666127000207410ustar00rootroot00000000000000# Makefile for big_tests SHELL = bash # suppress built-in rules, to speed things up MAKEFLAGS+=r # disable implicit suffixes, to speed things up .SUFFIXES: # keep all intermediate files .SECONDARY: .SECONDEXPANSION: # .ONESHELL doesn't work with make < 3.82! # .ONESHELL: # comment out for debugging .SILENT: # can't compare before running; can't prep data before downloading; etc. .NOTPARALLEL: .PHONY: default all stub help valid expected pretend run startTimer clean spotless %.valid %.expected %.run %.clean %.deps %.depsWrapper %.depsStart %.depsEnd %.stub TOP_MK_DIR := $(patsubst %/,%,$(dir $(CURDIR)/$(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))) ifdef TIMING TIMING_SUFFIX = WithTiming MIPS_HELP := $(shell lscpu | grep -i bogomips) MIPS := $(word $(words $(MIPS_HELP)),$(MIPS_HELP)) ARCH ?= $(subst /,_,$(shell uname -o)_$(MIPS)) # can't use the architecture as directory name, cuz that produces different expected files on different platforms # stageDir := $(ARCH) TIME ?= env time -f "%U" TIME_DIFF_TOLERANCE_REL ?= 0.05 TIME_DIFF_TOLERANCE_ABS ?= 1.0 endif stageDir ?= runs dataDir ?= dataSets expDir ?= expected testCodeDir := $(TOP_MK_DIR)/testCode allTests := $(patsubst %/,%,$(shell ls $(expDir))) # allTests := 2 # the ?= allows the user to override the executable from the command line, as in # make EXEC=myProgram all # alas, $(realpath ...) doesn't seem to work here on make 3.81 for some reason EXEC ?= ../vowpalwabbit/vw ACC_DIFF_TOLERANCE_REL ?= 0.001 ACC_DIFF_TOLERANCE_ABS ?= 0.001 DIFF ?= $(testCodeDir)/floatingTolerance.pl GREP ?= grep DEFAULT_STDOUT_COMPARATOR_REGEXP ?= "." DEFAULT_STDERR_COMPARATOR_REGEXP ?= "average loss" ARF ?= vwBigtests.tz2 # you can uncomment the next line to get the data from a cache by default, but we don't recommend it URL ?= https://vowpalwabbitdata.blob.core.windows.net/bigtests/vwBigtests.tz2 all: valid ; .DEFAULT: @echo "No rules found for $@ in $(MAKEFILE_LIST)" # details of individual tests and test groups come from here include dataSets.mk include testDetails.mk include testGroups.mk # for debugging %.stub: echo $($*.deps) echo $(stageDir)/$*.dir/done echo $(addsuffix .valid,$($*.deps)) # for debugging stub: echo $(TIMING) echo $(MAKE_VERSION) echo $(ARCH) echo $(MAKEFLAGS) echo $(EXEC) echo $(TOP_MK_DIR) echo $(VPATH) help: cat $(TOP_MK_DIR)/README.txt validTargets := $(addsuffix .valid,$(allTests)) valid: $(validTargets) timingSummary @echo "test suite finished" # use this target to run without doing any diffs runTargets := $(addsuffix .run,$(allTests)) run: prepData $(runTargets) timingSummary @echo "'make run' finished" timingSummary: if [ "$(TIMING)" = "y" ] ; then \ totalTime=0 ;\ for dir in $(allTests) ; do \ timeFile=$(stageDir)/$$dir.dir/$(ARCH)/rc ;\ currTime=`cat $$timeFile` ;\ totalTime=`echo "$$totalTime + $$currTime" | bc -l` ;\ done ;\ echo "Total test time = $$totalTime" ;\ fi mcompareTargets := $(addsuffix .compare,$(allTests)) compare: $(compareTargets) @echo "'make compare' finished" expectedTargets := $(addsuffix .expected,$(allTests)) expected: $(expectedTargets) @echo "'make expected' finished" pretendTargets := $(addsuffix .pretend,$(allTests)) pretend: $(pretendTargets) @echo "'make pretend' finished" clean: -rm -r $(stageDir) @echo "'make clean' finished" %.dir/$(ARCH) %.dir: @mkdir -p $@ %.valid: %.run %.compare ; # dependency tracking doesn't work in make < 4 due to a bug in how SECONDEXPANSION is done ifeq ($(findstring 4.,$(MAKE_VERSION)),4.) %.run: %.depsWrapper $(stageDir)/%.dir/done$(TIMING_SUFFIX) ; else %.run: $(stageDir)/%.dir/done$(TIMING_SUFFIX) ; endif $(dataDir) $(stageDir): @mkdir -p $@ %.depsWrapper: %.depsStart $$(%.deps) %.depsEnd ; %.depsStart: @if [ -n "$($*.deps)" ] ; then \ echo "test $* has dependencies: $($*.deps)" ;\ fi %.depsEnd: @if [ -n "$($*.deps)" ] ; then \ echo "dependencies of test $* satisfied" ;\ fi # Don't do like on the following line, because making the dir a dependency of the file that's in it will result in the file always getting remade, cuz the dir will always have a newer timestamp. # $(stageDir)/%.dir/done: $(stageDir)/%.dir $(stageDir)/%.dir/doneWithTiming: $(EXEC) $$(%.inData) @echo "Starting to run test $*" dir=`dirname $@` ;\ mkdir -p $$dir ;\ exec=$(EXEC) ;\ if [ -n "$($*.exec)" ] ; then exec="$($*.exec)" ; fi ;\ roFile=$$dir/raw_out ;\ reFile=$$dir/raw_err ;\ oFile=$$dir/out ;\ eFile=$$dir/err ;\ mkdir -p $$dir/$(ARCH) ;\ timeFile=$$dir/$(ARCH)/rc ;\ echo "$(TIME) -o $$timeFile $$exec $($*.params) $(EP) > $$roFile 2> $$reFile" ;\ $(TIME) -o $$timeFile $$exec $($*.params) $(EP) > $$roFile 2> $$reFile ;\ uTime=`cat $$timeFile` ;\ echo "run time of test $* = $$uTime" ;\ out_compar=$(DEFAULT_STDOUT_COMPARATOR_REGEXP) ;\ if [ -n "$($*.STDOUT_COMPARATOR_REGEXP)" ] ; then out_compar="$($*.STDOUT_COMPARATOR_REGEXP)" ; fi ;\ echo "$(GREP) \"$$out_compar\" $$roFile | tee $$oFile" ;\ $(GREP) "$$out_compar" $$roFile | tee $$oFile ;\ err_compar=$(DEFAULT_STDERR_COMPARATOR_REGEXP) ;\ if [ -n "$($*.STDERR_COMPARATOR_REGEXP)" ] ; then err_compar="$($*.STDERR_COMPARATOR_REGEXP)" ; fi ;\ echo "$(GREP) \"$$err_compar\" $$reFile | tee $$eFile" ;\ $(GREP) "$$err_compar" $$reFile | tee $$eFile ;\ touch $$dir/done touch $@ @echo "Finished running test $*" $(stageDir)/%.dir/done: $(EXEC) $$(%.inData) @echo "Starting to run test $*" dir=`dirname $@` ;\ mkdir -p $$dir ;\ exec=$(EXEC) ;\ if [ -n "$($*.exec)" ] ; then exec="$($*.exec)" ; fi ;\ roFile=$$dir/raw_out ;\ reFile=$$dir/raw_err ;\ oFile=$$dir/out ;\ eFile=$$dir/err ;\ echo "$$exec $($*.params) $(EP) > $$roFile 2> $$reFile" ;\ $$exec $($*.params) $(EP) > $$roFile 2> $$reFile ;\ out_compar=$(DEFAULT_STDOUT_COMPARATOR_REGEXP) ;\ if [ -n "$($*.STDOUT_COMPARATOR_REGEXP)" ] ; then out_compar="$($*.STDOUT_COMPARATOR_REGEXP)" ; fi ;\ echo "$(GREP) \"$$out_compar\" $$roFile | tee $$oFile" ;\ $(GREP) "$$out_compar" $$roFile | tee $$oFile ;\ err_compar=$(DEFAULT_STDERR_COMPARATOR_REGEXP) ;\ if [ -n "$($*.STDERR_COMPARATOR_REGEXP)" ] ; then err_compar="$($*.STDERR_COMPARATOR_REGEXP)" ; fi ;\ echo "$(GREP) \"$$err_compar\" $$reFile | tee $$eFile" ;\ $(GREP) "$$err_compar" $$reFile | tee $$eFile ;\ touch $@ @echo "Finished running test $*" # first diff all the results, then set exit status if non-empty %.compare: $(DIFF) $(expDir)/%/out $(expDir)/%/err dir=$(stageDir)/$*.dir ;\ rm -f $$dir/diffs ;\ for FILE in out err $($*.otherOutputs) ; do \ echo "$(DIFF) $(ACC_DIFF_TOLERANCE_REL) $(ACC_DIFF_TOLERANCE_ABS) $$dir/$$FILE $(expDir)/$*/$$FILE" ;\ $(DIFF) $(ACC_DIFF_TOLERANCE_REL) $(ACC_DIFF_TOLERANCE_ABS) $$dir/$$FILE $(expDir)/$*/$$FILE |& tee -a $$dir/diffs ;\ done ;\ if [ -s $$dir/diffs ] ; then \ echo "Test $* failed due to accuracy diffs." ;\ exit 1 ;\ fi ;\ if [ "$(TIMING)" = "y" ] ; then \ timeFile=$$dir/$(ARCH)/rc ;\ uTime=`cat $$timeFile` ;\ echo "run time of test $* = $$uTime" ;\ if [ -s $(expDir)/$*/$(ARCH)/rc ] ; then (\ echo "$(DIFF) $(TIME_DIFF_TOLERANCE_REL) $(TIME_DIFF_TOLERANCE_ABS) $$timeFile $(expDir)/$*/$(ARCH)/rc" ;\ $(DIFF) $(TIME_DIFF_TOLERANCE_REL) $(TIME_DIFF_TOLERANCE_ABS) $$timeFile $(expDir)/$*/$(ARCH)/rc |& tee -a $$dir/diffs ;\ if [ -s $$dir/diffs ] ; then \ echo "Test $* failed due to speed diffs." ;\ exit 1 ;\ fi ;\ ) ;\ else (\ echo "WARNING: Missing expected timing file. You might want to create it with 'make TIMING=y expected'. Timing validation skipped." \ ) ; \ fi ;\ fi # update expected files from current output files # expected files always include out and err %.expected: dir=$(stageDir)/$*.dir ;\ mkdir -p $(expDir)/$*/ ;\ for FILE in out err $($*.otherOutputs) ; do \ (echo "copying $$dir/$$FILE to $(expDir)/$*/" ; cp $$dir/$$FILE $(expDir)/$*/ ) ;\ done ;\ if [ "$(TIMING)" = "y" ] ; then \ mkdir -p $(expDir)/$*/$(ARCH) ;\ (echo "copying $$dir/$(ARCH)/rc to $(expDir)/$*/$(ARCH)/rc" ; cp $$dir/$(ARCH)/rc $(expDir)/$*/$(ARCH)/rc ) ;\ fi # copy outputs from expected files, to pretend that the module succeeded %.pretend: $(stageDir)/%.dir $(expDir)/%/out $(expDir)/%/err dir=$(stageDir)/$*.dir ;\ for FILE in out err $($*.otherOutputs) ; do \ echo "copying $(expDir)/$*/$$FILE to $$dir/" ;\ cp $(expDir)/$*/$$FILE $$dir/ ;\ done ;\ touch $$dir/done ;\ if [ "$(TIMING)" = "y" ] ; then \ mkdir -p $$dir/$(ARCH) ;\ echo "copying $(expDir)/$*/$(ARCH)/rc $$dir/$(ARCH)/rc" ;\ cp $(expDir)/$*/$(ARCH)/rc $$dir/$(ARCH)/rc ;\ fi %.clean: -rm -rf $(stageDir)/$*.dir @echo "finished cleaning $(stageDir)/$*.dir" vowpal-wabbit-8.6.1.dfsg1/big_tests/README.txt000077700000000000000000000000001332666127000237212HOWTO.use_it.txtustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/dataSets.mk000066400000000000000000000115651332666127000214110ustar00rootroot00000000000000# Makefile for obtaining and preparing data sets allData := mnist covtype URLRep ER movielens ################ begin generic stuff ######### VPATH+=$(testCodeDir) WGET ?= wget -nv -N --no-use-server-timestamps --no-check-certificate .PHONY: getData prepData archive eraseData # archive file name can be specified on the command line as ARF= archive: cd $(dataDir) find . -name '*prep' -print0 | tar -cjhv --null -f $(ARF) -T - getData: $(dataDir) cd $(dataDir) $(WGET) $(URL) fName=`basename $(URL)` ; tar xjvmf $$fName # If a URL is specified, then simply download all the data # pre-prepped. Checked-in check-sums should be used to guard against # something missing or corrupted in the archive. allDataTargets := $(addsuffix .prep,$(allData)) ifdef URL prepData: getData else prepData: $(allDataTargets) endif @echo "finished preparing all data" %.prep: $(dataDir)/%.dir/prep ; # allDataDirs := $(addprefix $(dataDir)/,$(addsuffix .dir,$(allData))) eraseData: -rm -r $(dataDir) @echo "finished erasing all data" ################ end generic stuff ######### #OCR OCR.prep: $(dataDir)/OCR.dir/train.prep $(dataDir)/OCR.dir/test.prep ; $(dataDir)/OCR.dir/train.prep: $(dataDir)/OCR.dir/test.prep ; $(dataDir)/OCR.dir/test.prep: $(dataDir)/OCR.dir/letter.data.gz $(dataDir)/OCR.dir/letter.names dir=$(dir $@) ;\ cd $$dir ;\ $(testCodeDir)/ocr2vw.py letter.data.gz letter.names train.prep test.prep $(dataDir)/OCR.dir/letter.data.gz: dir=$(dir $@) ;\ mkdir -p $$dir ;\ cd $$dir ;\ $(WGET) http://ai.stanford.edu/~btaskar/ocr/letter.data.gz $(dataDir)/OCR.dir/letter.names: dir=$(dir $@) ;\ mkdir -p $$dir ;\ cd $$dir ;\ $(WGET) http://ai.stanford.edu/~btaskar/ocr/letter.names #movielens movielens.prep: $(dataDir)/movielens.dir/train.prep ; $(dataDir)/movielens.dir/train.prep: $(dataDir)/movielens.dir/test.prep ; cd $(dataDir)/movielens.dir/ ;\ perl -ne 'BEGIN { srand 8675309; }; \ 1; print join "\t", rand (), $$_;' \ pre.train.vw | sort -k1 | \ cut -f2- > train.prep $(dataDir)/movielens.dir/test.prep: $(dataDir)/movielens.dir/ml-1m.zip cd $(dataDir)/movielens.dir/ ;\ unzip -ou ml-1m.zip ;\ $(testCodeDir)/movielensRatings2vw.pl pre.train.vw test.prep ml-1m/ratings.dat $(dataDir)/movielens.dir/ml-1m.zip: dir=$(dir $@) ;\ mkdir -p $$dir ;\ cd $$dir ;\ $(WGET) http://files.grouplens.org/datasets/movielens/ml-1m.zip #ER ER.prep: $(dataDir)/ER.dir/train.prep $(dataDir)/ER.dir/test.prep ; $(dataDir)/ER.dir/train.prep: $(dataDir)/ER.dir/ER_train.vw cd $(dataDir)/ER.dir/ ;\ ln -sf ER_train.vw train.prep $(dataDir)/ER.dir/test.prep: $(dataDir)/ER.dir/ER_test.vw cd $(dataDir)/ER.dir/ ;\ ln -sf ER_test.vw test.prep $(dataDir)/ER.dir/ER_train.vw: $(dataDir)/ER.dir/ER_test.vw ; touch $@ $(dataDir)/ER.dir/ER_test.vw: $(dataDir)/ER.dir/er.zip cd $(dataDir)/ER.dir/ ;\ unzip -ou er.zip ;\ touch ER_test.vw $(dataDir)/ER.dir/er.zip: dir=$(dir $@) ;\ mkdir -p $$dir ;\ cd $$dir ;\ $(WGET) http://web.engr.illinois.edu/~kchang10/data/er.zip # URLRep $(dataDir)/URLRep.dir/prep: $(dataDir)/URLRep.dir/url_svmlight.tar.gz URLRep.munge.sh export testCodeDir=$(testCodeDir) ;\ cd $(dataDir)/URLRep.dir/ ;\ $(testCodeDir)/URLRep.munge.sh url_svmlight.tar.gz > prep $(dataDir)/URLRep.dir/url_svmlight.tar.gz: dir=$(dir $@) ;\ mkdir -p $$dir ;\ cd $$dir ;\ $(WGET) https://archive.ics.uci.edu/ml/machine-learning-databases/url/url_svmlight.tar.gz # COVERTYPE $(dataDir)/covtype.dir/prep: $(dataDir)/covtype.dir/covtype.data.gz covtype.munge.sh export testCodeDir=$(testCodeDir) ;\ cd $(dataDir)/covtype.dir/ ;\ $(testCodeDir)/covtype.munge.sh covtype.data.gz > prep $(dataDir)/covtype.dir/covtype.data.gz: dir=$(dir $@) ;\ mkdir -p $$dir ;\ cd $$dir ;\ $(WGET) https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz ## MNIST # override implicit %.prep rule mnist.prep: $(dataDir)/mnist.dir/train.prep $(dataDir)/mnist.dir/test.prep ; $(dataDir)/mnist.dir/train.prep: mnist.extractfeatures mnist.extract-labels.pl shuffle.pl $(dataDir)/mnist.dir/train-labels-idx1-ubyte.gz $(dataDir)/mnist.dir/train-images-idx3-ubyte.gz mnist.munge.sh export testCodeDir=$(testCodeDir) ;\ cd $(dataDir)/mnist.dir/ ;\ $(testCodeDir)/mnist.munge.sh train-labels-idx1-ubyte.gz train-images-idx3-ubyte.gz \ | $(testCodeDir)/shuffle.pl > train.prep $(dataDir)/mnist.dir/test.prep: mnist.munge.sh mnist.extractfeatures mnist.extract-labels.pl $(dataDir)/mnist.dir/t10k-labels-idx1-ubyte.gz $(dataDir)/mnist.dir/t10k-images-idx3-ubyte.gz export testCodeDir=$(testCodeDir) ;\ cd $(dataDir)/mnist.dir/ ;\ $(testCodeDir)/mnist.munge.sh t10k-labels-idx1-ubyte.gz t10k-images-idx3-ubyte.gz > test.prep mnist.extractfeatures: mnist.extractfeatures.cpp cd $(testCodeDir)/ ;\ g++ -O3 -Wall $^ -o $@ $(dataDir)/mnist.dir/%.gz: dir=$(dir $@) ;\ mkdir -p $$dir ;\ cd $$dir ;\ fileName=`basename $@` ;\ $(WGET) http://yann.lecun.com/exdb/mnist/$$fileName vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/000077500000000000000000000000001332666127000211015ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/1a/000077500000000000000000000000001332666127000214025ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/1a/err000066400000000000000000000000301332666127000221060ustar00rootroot00000000000000average loss = 0.013496 vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/1a/out000066400000000000000000000000001332666127000221220ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/1b/000077500000000000000000000000001332666127000214035ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/1b/err000066400000000000000000000000301332666127000221070ustar00rootroot00000000000000average loss = 0.021500 vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/1b/out000066400000000000000000000000001332666127000221230ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/2/000077500000000000000000000000001332666127000212425ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/2/err000066400000000000000000000000301332666127000217460ustar00rootroot00000000000000average loss = 0.181223 vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/2/out000066400000000000000000000000001332666127000217620ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/3/000077500000000000000000000000001332666127000212435ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/3/err000066400000000000000000000000301332666127000217470ustar00rootroot00000000000000average loss = 0.009036 vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/3/out000066400000000000000000000000001332666127000217630ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/4a/000077500000000000000000000000001332666127000214055ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/4a/err000066400000000000000000000000301332666127000221110ustar00rootroot00000000000000average loss = 0.3627 h vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/4a/out000066400000000000000000000000001332666127000221250ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/4b/000077500000000000000000000000001332666127000214065ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/4b/err000066400000000000000000000000271332666127000221200ustar00rootroot00000000000000average loss = 0.38117 vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/4b/out000066400000000000000000000000001332666127000221260ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/5a/000077500000000000000000000000001332666127000214065ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/5a/err000066400000000000000000000000301332666127000221120ustar00rootroot00000000000000average loss = 0.319224 vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/5a/out000066400000000000000000000000001332666127000221260ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/5b/000077500000000000000000000000001332666127000214075ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/5b/err000066400000000000000000000000301332666127000221130ustar00rootroot00000000000000average loss = 0.343701 vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/5b/out000066400000000000000000000000001332666127000221270ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/6a/000077500000000000000000000000001332666127000214075ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/6a/err000066400000000000000000000000301332666127000221130ustar00rootroot00000000000000average loss = 0.185694 vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/6a/out000066400000000000000000000000001332666127000221270ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/6b/000077500000000000000000000000001332666127000214105ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/6b/err000066400000000000000000000000301332666127000221140ustar00rootroot00000000000000average loss = 0.449856 vowpal-wabbit-8.6.1.dfsg1/big_tests/expected/6b/out000066400000000000000000000000001332666127000221300ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/testCode/000077500000000000000000000000001332666127000210525ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/big_tests/testCode/URLRep.munge.sh000077500000000000000000000001141332666127000236300ustar00rootroot00000000000000#! /bin/bash tar xzf $1 cat url_svmlight/*.svm \ | $testCodeDir/svml2vw.pl vowpal-wabbit-8.6.1.dfsg1/big_tests/testCode/covtype.munge.sh000077500000000000000000000001121332666127000242060ustar00rootroot00000000000000#! /bin/bash gunzip -c $1 \ | perl -pe 's/(.*),(.*)/$2 | $1/; s/,/ /g;' vowpal-wabbit-8.6.1.dfsg1/big_tests/testCode/floatingTolerance.pl000077500000000000000000000040211332666127000250470ustar00rootroot00000000000000#!/usr/bin/env perl #################################################################################################### # Author: I. Dan Melamed # Purpose: for each pair of floating point numbers (x,y), +ive relative tolerance r, and +ive absolute tolerance a, fail if # (x > a OR y > a) AND (x / y - 1 > r) AND (x - y > a) # Streams: 2 files of numbers; the numbers that we want to be smaller should come first # N.B.: tolerance is evaluated in only one direction, unless one of the numbers is zero # N.B.2: return code is > 0 iff tolerance test fails #################################################################################################### #check for correct usage if ($#ARGV < 0) { print "usage: $0 []\n"; exit; }; $exitCode = 0; $rtolerance = shift; $atolerance = shift; open(F, $ARGV[0]) || die "\nCouldn't open $ARGV[0]: $!\n"; shift; open(G, $ARGV[0]) || die "\nCouldn't open $ARGV[0]: $!\n"; shift; LINE: while () { @ftok = split; if (eof(G)) { print "1st file has more lines than 2nd.\n"; $exitCode = 11; last; }; $_ = ; @gtok = split; while (@ftok) { if (! @gtok) { print "Different number of tokens on line $.\n"; $exitCode = 13; next LINE; }; $ftok = shift @ftok; $gtok = shift @gtok; if ($ftok <= $atolerance && $gtok <= $atolerance) { next; }; if ( ($ftok != 0 && $gtok == 0) || ($ftok == 0 && $gtok != 0) ) { print "Difference in zeros on line $.: $ftok vs. $gtok .\n"; $exitCode = 15; next LINE; }; $rdiff = $ftok / $gtok - 1.0; $adiff = $ftok - $gtok; if ($rdiff > $rtolerance && $adiff > $atolerance) { print "Differences exceed rel. tolerance of $rtolerance and abs. tolerance of $atolerance on line $.: $ftok vs. $gtok .\n"; $exitCode = 17; next LINE; }; }; if (@gtok) { print "Different number of tokens on line $.\n"; $exitCode = 19; next LINE; }; }; if (not eof(G)) { $exitCode = 21; print "2nd file has more lines than 1st.\n"; }; exit $exitCode; vowpal-wabbit-8.6.1.dfsg1/big_tests/testCode/mnist.extract-labels.pl000077500000000000000000000010571332666127000254600ustar00rootroot00000000000000#! /usr/bin/env perl use warnings; use strict; use IO::File; local $SIG{__WARN__} = sub {}; #my $fname = shift @ARGV or die; #my $fh = new IO::File $fname, "r" or die "$fname: $!"; my $fh = \*STDIN; binmode ($fh, ':raw'); $/ = \4; my $magic = unpack ("N", <$fh>); die "wtf $magic" unless $magic == 2049; my $n_items = unpack ("N", <$fh>); warn "n_items = $n_items"; $/ = \1; while (defined ($_ = <$fh>)) { die "wtf @{[length($_)]}" unless length ($_) == 1; my ($label) = unpack ("C", $_); ++$label; print "$label 1 $label|\n"; } vowpal-wabbit-8.6.1.dfsg1/big_tests/testCode/mnist.extractfeatures.cpp000066400000000000000000000021601332666127000261170ustar00rootroot00000000000000#include #include #include #include #include #include int main (void) { using std::cin; using std::cout; using std::endl; using std::setprecision; uint32_t magic; cin.read (reinterpret_cast (&magic), sizeof (uint32_t)); magic = ntohl (magic); assert (magic == 2051); uint32_t n_images; cin.read (reinterpret_cast (&n_images), sizeof (uint32_t)); n_images = ntohl (n_images); uint32_t n_rows; cin.read (reinterpret_cast (&n_rows), sizeof (uint32_t)); n_rows = ntohl (n_rows); uint32_t n_columns; cin.read (reinterpret_cast (&n_columns), sizeof (uint32_t)); n_columns = ntohl (n_columns); uint32_t rc = n_rows * n_columns; unsigned char buf[rc]; for (cin.read (reinterpret_cast (buf), rc); ! cin.eof (); cin.read (reinterpret_cast (buf), rc)) { cout << "|p"; for (unsigned int p = 0; p < n_rows * n_columns; ++p) { if (buf[p]) cout << " " << p << ":" << setprecision (8) << static_cast(buf[p])/256.0; } cout << endl; } return 0; } vowpal-wabbit-8.6.1.dfsg1/big_tests/testCode/mnist.extractpixels.cpp000066400000000000000000000021111332666127000256010ustar00rootroot00000000000000#include #include #include #include #include #include int main (void) { using std::cin; using std::cout; using std::endl; using std::setprecision; uint32_t magic; cin.read (reinterpret_cast (&magic), sizeof (uint32_t)); magic = ntohl (magic); assert (magic == 2051); uint32_t n_images; cin.read (reinterpret_cast (&n_images), sizeof (uint32_t)); n_images = ntohl (n_images); uint32_t n_rows; cin.read (reinterpret_cast (&n_rows), sizeof (uint32_t)); n_rows = ntohl (n_rows); uint32_t n_columns; cin.read (reinterpret_cast (&n_columns), sizeof (uint32_t)); n_columns = ntohl (n_columns); uint32_t rc = n_rows * n_columns; unsigned char buf[rc]; for (cin.read (reinterpret_cast (buf), rc); ! cin.eof (); cin.read (reinterpret_cast (buf), rc)) { for (unsigned int p = 0; p < n_rows * n_columns; ++p) { if (buf[p]) cout << " " << p << ":" << static_cast(buf[p]); } cout << endl; } return 0; } vowpal-wabbit-8.6.1.dfsg1/big_tests/testCode/mnist.munge.sh000077500000000000000000000002141332666127000236520ustar00rootroot00000000000000#! /bin/bash paste -d' ' \ <(gunzip -c $1 | $testCodeDir/mnist.extract-labels.pl) \ <(gunzip -c $2 | $testCodeDir/mnist.extractfeatures) vowpal-wabbit-8.6.1.dfsg1/big_tests/testCode/movielensRatings2vw.pl000077500000000000000000000024171332666127000254060ustar00rootroot00000000000000#! /usr/bin/env perl use IO::File; use warnings; use strict; srand 69; sub output_user ($$@) { my ($trainfh, $testfh, @rows) = @_; return () unless @rows > 1; my @permrows = map { $_->[1] } sort { $a->[0] <=> $b->[0] } map { [ rand (), $_ ] } @rows; my @testrows = splice @permrows, -1; print $trainfh @permrows; return @testrows; } my $trainfile = shift @ARGV or die; my $testfile = shift @ARGV or die; my $trainfh = new IO::File $trainfile, "w" or die; my $testfh = new IO::File $testfile, "w" or die; my $olduser; my @rows; my @save; my %seen; while (defined ($_ = <>)) { chomp; my ($user, $movie, $rating, undef) = split /::/, $_; if (defined ($olduser) && $user != $olduser) { push @save, output_user ($trainfh, $testfh, @rows); undef @rows; die "input file not collated" if $seen{$olduser}++; } push @rows, "$rating $rating|user $user |movie $movie\n"; $olduser = $user; } push @save, output_user ($trainfh, $testfh, @rows); my @permsave = map { $_->[1] } sort { $a->[0] <=> $b->[0] } map { [ rand (), $_ ] } @save; my @test = splice @permsave, 0, 5000; print $trainfh @permsave; print $testfh @test; vowpal-wabbit-8.6.1.dfsg1/big_tests/testCode/ocr2vw.py000077500000000000000000000043021332666127000226500ustar00rootroot00000000000000#!/usr/bin/env python # convert letter.data to letter.vw def read_letter_names (fn): ret = list() with open(fn) as ins: for line in ins: ret.append(line.rstrip()) print "Read %d names from %s" % (len(ret),fn) return ret def find_pixel_start (names): for i in range(len(names)): if names[i].startswith("p_"): return i raise ValueError("No pixel data",names) def data2vw (ifn, train, test, names): lineno = 0 trainN = 0 testN = 0 if ifn.endswith(".gz"): import gzip iopener = gzip.open else: iopener = open id_pos = names.index("id") letter_pos = names.index("letter") pixel_start = find_pixel_start(names) with iopener(ifn) as ins, open(train,"wb") as trainS, open(test,"wb") as testS: for line in ins: lineno += 1 vals = line.rstrip().split('\t') if len(vals) != len(names): raise ValueError("Bad field count", len(vals),len(names),vals,names) char = vals[letter_pos] if len(char) != 1: raise ValueError("Bad letter",char) if lineno % 10 == 0: testN += 1 outs = testS else: trainN += 1 outs = trainS outs.write("%d 1 %s-%s|Pixel" % (ord(char)-ord('a')+1,char,vals[id_pos])) for i in range(pixel_start,len(names)): if vals[i] != '0': outs.write(' %s:%s' % (names[i],vals[i])) outs.write('\n') print "Read %d lines from %s; wrote %d lines into %s and %d lines into %s" % ( lineno,ifn,trainN,train,testN,test) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='Convert letters.data to VW format') parser.add_argument('input',help='path to letter.data[.gz]') parser.add_argument('names',help='path to letter.names') parser.add_argument('train',help='VW train file location (90%)') parser.add_argument('test',help='VW test file location (10%)') args = parser.parse_args() data2vw(args.input,args.train,args.test,read_letter_names(args.names)) vowpal-wabbit-8.6.1.dfsg1/big_tests/testCode/shuffle.pl000077500000000000000000000006501332666127000230470ustar00rootroot00000000000000#!/usr/bin/env perl ######################################################################## # shuffle input lines # same as random sample (without replacement) of all of them ######################################################################## srand(13); while (<>) { $line[$. - 1] = $_; }; $size = $count = $.; for(;$size > 0; $size-- && $count--) { $ind = int rand $count; print splice(@line, $ind, 1); }; vowpal-wabbit-8.6.1.dfsg1/big_tests/testCode/svml2vw.pl000077500000000000000000000002071332666127000230310ustar00rootroot00000000000000#!/usr/bin/env perl # convert data format from SVM-Light to VW while (<>) { s/^\-1/0/; s/^\+1/1/; s/ / | /; print; } vowpal-wabbit-8.6.1.dfsg1/big_tests/testDetails.mk000066400000000000000000000036131332666127000221210ustar00rootroot00000000000000 # MNIST training 1a.inData := $(dataDir)/mnist.dir/train.prep 1a.params := --oaa 10 -d $(1a.inData) -f $(stageDir)/1a.dir/mnist.model -b 24 --adaptive --invariant --holdout_off -l 0.1 --nn 40 --passes 24 -k --compressed --cache_file $(stageDir)/1a.dir/mnist.cache # MNIST prediction 1b.inData := $(dataDir)/mnist.dir/test.prep 1b.params := -t -d $(1b.inData) -i $(stageDir)/1a.dir/mnist.model # test dependencies not working yet 1b.deps := 1a.valid # COVERTYPE 2.inData := $(dataDir)/covtype.dir/prep 2.params := --oaa 7 -d $(2.inData) # URL Reputation 3.inData := $(dataDir)/URLRep.dir/prep 3.params := -d $(3.inData) # Entity Relation training 4a.inData := $(dataDir)/ER.dir/train.prep 4a.params := -b 24 -d $(4a.inData) --search 10 --passes 10 --search_task entity_relation --constraints --search_alpha 1e-8 -f $(stageDir)/4a.dir/er.model --cache_file $(stageDir)/4a.dir/er.cache # Entity Relation prediction 4b.inData := $(dataDir)/ER.dir/test.prep 4b.params := -t -d $(4b.inData) -i $(stageDir)/4a.dir/er.model 4b.deps := 4a.valid # MovieLens training 5a.inData := $(dataDir)/movielens.dir/train.prep 5a.params := --loss_function quantile -l 0.45 -b 24 --passes 100 -k --cache_file $(stageDir)/5a.dir/movielens.cache -d $(5a.inData) --holdout_off --lrq um14 --lrqdropout --adaptive --invariant -f $(stageDir)/5a.dir/movielens.model # MovieLens prediction 5b.inData := $(dataDir)/movielens.dir/test.prep 5b.params := --loss_function quantile -t -i $(stageDir)/5a.dir/movielens.model -d $(5b.inData) 5b.deps := 5a.valid # OCR training 6a.inData := $(dataDir)/OCR.dir/train.prep 6a.params := -d $(6a.inData) -f $(stageDir)/6a.dir/OCR.model --cache_file $(stageDir)/6a.dir/OCR.cache -k --oaa 26 --adaptive --invariant --holdout_off --loss_function logistic --passes 14 # OCR prediction 6b.inData := $(dataDir)/OCR.dir/test.prep 6b.params := -i $(stageDir)/6a.dir/OCR.model -d $(6b.inData) --testonly 6b.deps := 6a.valid vowpal-wabbit-8.6.1.dfsg1/big_tests/testGroups.mk000066400000000000000000000002061332666127000220060ustar00rootroot00000000000000# this test group is bogus, for illustration only; adjust if you know what you're doing regression_group: 1a.valid 1b.valid 3.valid ; vowpal-wabbit-8.6.1.dfsg1/c_test/000077500000000000000000000000001332666127000165765ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/c_test/c_test.vcxproj000066400000000000000000000310141332666127000214730ustar00rootroot00000000000000 DebugLeakCheck Win32 DebugLeakCheck x64 Debug Win32 Debug x64 Release Win32 Release x64 {E5865596-E5F0-4CA3-B04A-4E34B798744A} c_test ..\vowpalwabbit\ ..\sdl\SDL-7.0-Recommended.ruleset true Application true MultiByte v140 Application true MultiByte v140 Application true MultiByte v140 Application true MultiByte v140 Application false true MultiByte v140 Application false true MultiByte v140 $(SolutionDir)dll\$(Platform)\$(ConfigurationName);$(LibraryPath) $(ProjectDir)Bin\$(Platform)\$(Configuration)\ $(SolutionDir)dll\$(Platform)\$(ConfigurationName);$(LibraryPath) $(ProjectDir)Bin\$(Platform)\$(Configuration)\ $(ProjectDir)Bin\$(Platform)\$(Configuration)\ $(ProjectDir)Bin\$(Platform)\$(Configuration)\ $(ProjectDir)Bin\$(Platform)\$(Configuration)\ $(ProjectDir)Bin\$(Platform)\$(Configuration)\ Level3 Disabled $(SolutionDir);%(AdditionalIncludeDirectories) true $(SolutionDir)dll\$(PlatformShortName)\$(ConfigurationName)\libvw.lib;%(AdditionalDependencies) xcopy /y/d $(SolutionDir)dll\$(PlatformShortName)\$(ConfigurationName)\libvw.* $(TargetDir) Level3 Disabled $(SolutionDir);%(AdditionalIncludeDirectories) true $(SolutionDir)dll\$(PlatformShortName)\$(ConfigurationName)\libvw.lib;%(AdditionalDependencies) xcopy /y/d $(SolutionDir)dll\$(PlatformShortName)\$(ConfigurationName)\libvw.* $(TargetDir) Level3 Disabled $(SolutionDir);%(AdditionalIncludeDirectories) true $(SolutionDir)dll\$(Platform)\$(ConfigurationName)\libvw.lib;%(AdditionalDependencies) xcopy /y/d $(SolutionDir)dll\$(PlatformShortName)\$(ConfigurationName)\libvw.* $(TargetDir) Level3 Disabled $(SolutionDir);%(AdditionalIncludeDirectories) true $(SolutionDir)dll\$(Platform)\$(ConfigurationName)\libvw.lib;%(AdditionalDependencies) xcopy /y/d $(SolutionDir)dll\$(PlatformShortName)\$(ConfigurationName)\libvw.* $(TargetDir) Level3 MaxSpeed true true $(SolutionDir);%(AdditionalIncludeDirectories) true true true $(SolutionDir)dll\$(PlatformShortName)\$(ConfigurationName)\libvw.lib;%(AdditionalDependencies) xcopy /y/d $(SolutionDir)dll\$(PlatformShortName)\$(ConfigurationName)\libvw.* $(TargetDir) Level3 MaxSpeed true true $(SolutionDir);%(AdditionalIncludeDirectories) true true true $(SolutionDir)dll\$(Platform)\$(Configuration)\libvw.lib;%(AdditionalDependencies) xcopy /y/d $(SolutionDir)dll\$(PlatformName)\$(ConfigurationName)\libvw.* $(TargetDir) This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. vowpal-wabbit-8.6.1.dfsg1/c_test/sample.c000066400000000000000000000015471332666127000202320ustar00rootroot00000000000000#include #ifndef OMIT_MS // a gcc sample would omit these two defines #define MS_CONV // use Microsoft libraray convention #define USE_CODECVT // enable wide character converesion enabled API #endif /* To compile with gcc use this command LD_LIBRARY_PATH=../vowpalwabbit/.libs ; gcc sample_gcc.c -I./../vowpalwabbit/ -L./../vowpalwabbit/.libs -lvw -lvw_c_wrapper -lallreduce -o sample_gcc */ typedef short char16_t; #define bool int #define true (1) #define false (0) #include "vwdll.h" int main() { VW_HANDLE vw; VW_EXAMPLE example; float score; printf("this is a native c program calling vw\n"); vw = VW_InitializeA("-q st --noconstant --quiet"); example = VW_ReadExampleA(vw, "1 |s p^the_man w^the w^man |t p^un_homme w^un w^homme"); score = VW_Learn(vw, example); VW_Finish(vw); printf("Score = %f\n", score); return 0; }vowpal-wabbit-8.6.1.dfsg1/cluster/000077500000000000000000000000001332666127000167765ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cluster/Makefile000066400000000000000000000013751332666127000204440ustar00rootroot00000000000000CXX ?= $(shell which g++) ifneq ($(CXX),) #$(warning Using clang: "$(CXX)") ARCH = -D__extern_always_inline=inline else CXX = g++ # $(warning Using g++) ARCH = $(shell test `g++ -v 2>&1 | tail -1 | cut -d ' ' -f 3 | cut -d '.' -f 1,2` \< 4.3 && echo -march=nocona || echo -march=native) endif ifeq ($(CXX),) $(error No compiler found) endif FLAGS += -I ../vowpalwabbit STDLIBS = $(BOOST_LIBRARY) $(LIBS) all: spanning_tree %.o: %.cc %.h $(CXX) $(FLAGS) -c $< -o $@ %.o: %.cc $(CXX) $(FLAGS) -c $< -o $@ spanning_tree: spanning_tree_main.o ../vowpalwabbit/spanning_tree.o ../vowpalwabbit/vw_exception.o $(CXX) $(FLAGS) -o $@ $+ $(STDLIBS) install: spanning_tree cp spanning_tree /usr/local/bin clean: rm -f *.o $(BINARIES) *~ $(MANPAGES) vowpal-wabbit-8.6.1.dfsg1/cluster/Makefile.am000066400000000000000000000011001332666127000210220ustar00rootroot00000000000000bin_PROGRAMS = spanning_tree spanning_tree_SOURCES = spanning_tree_main.cc ../vowpalwabbit/spanning_tree.cc ../vowpalwabbit/vw_exception.cc spanning_tree_LDADD = ../vowpalwabbit/liballreduce.la spanning_tree_DEPENDENCIES = ../vowpalwabbit/liballreduce.la ACLOCAL_AMFLAGS = -I acinclude.d AM_CPPFLAGS = -I$(top_srcdir)/vowpalwabbit AM_CXXFLAGS = ${BOOST_CPPFLAGS} ${ZLIB_CPPFLAGS} ${PTHREAD_CFLAGS} AM_LDFLAGS = ${BOOST_LDFLAGS} ${BOOST_PROGRAM_OPTIONS_LIB} ${ZLIB_LDFLAGS} ${PTHREAD_LIBS} if CLANG_LIBCXX AM_CXXFLAGS += -stdlib=libc++ -I ../vowpalwabbit -std=c++0x endif vowpal-wabbit-8.6.1.dfsg1/cluster/README_cluster000066400000000000000000000073571332666127000214330ustar00rootroot00000000000000The implementation of Non-linear (Preconditioned) Conjugate Gradient, LBFGS, online learning, and adaptive online learning works on clusters (both Hadoop and otherwise) now. To build the code, run make. At a high level, the code operates by repeatedly executing something equivalent to the MPI AllReduce function---adding up floats from all nodes then broadcasting them back to each individual node. In order to do this, a spanning tree over the nodes must be created. This is done using the helper daemon 'allreduce_master'. *********************************************************************** To run the code on non-Hadoop clusters, the script 'single_machine' has the simplest possible invocation. In general: start the span server on one of the cluster nodes: ./spanning_tree Launch vw on each of the worker nodes: ./vw --span_server --total --node --unique_id -d where: is the host running spanning_tree is the total number of nodes is the node id number is a number shared by all nodes in the process is the input source file for that node *********************************************************************** To run the code on Hadoop clusters: Decide if you are going to control the number of tasks by: (a) using gzip compressed files which cannot be broken up by Hadoop (b) controlling the number of reducers. We'll assume (a) below. Start the span server for the Hadoop cluster: ./spanning_tree Start the map-reduce job using Hadoop streaming: hadoop jar $HADOOP_HOME/hadoop-streaming.jar \ -files vw,runvw.sh \ -Dmapred.job.map.memory.mb=2500 -input -output \ -mapper runvw.sh -reducer NONE where is the directory on HDFS where you want the trained model to be saved. The trained model is saved to the file /model on HDFS and can be retreived by hadoop -get. To modify the arguments to VW, edit the script runvw.sh. Arguments to hadoop can be directly added in the hadoop streaming command. See the 'mapscript.sh' which uses 'runvw.sh' for an advanced example of running VW in a Hadoop enviornmnent. ************************************************************************ The files you need to know about: runvw.sh: This is the mapper code. It takes as arguments: The output directory. The trained model from the first mapper is stored as the file "model" in the output directory. The hostname of the cluster gateway, so that the mappers can connect to the gateway All the other standard VW options are currently hardcoded in the script, feel free to mess around with them. ######################################################################### spanning_tree.cc: This is the span server code which runs on the gateway. You start it before the call to hadoop. The span server backgrounds itself after starting and listens for incoming connections. It sets up the topology on the mappers and then let them communicate amongst themselves. ######################################################################### allreduce.h: This is the header file for the nodes. ######################################################################### allreduce.cc: This is the code for doing allreduce. It implement the routine described above. all_reduce is implemented as a combination of reduce and broadcast routines. reduce reads data from children, adds it with local data and passes it up to the parent with a call to pass_up. broadcast receives data from parent, and passes it down to children with a call to pass_down. ######################################################################### cg.cc, gd.cc, bfgs.cc: learning algorithms which use all_reduce whenever communication is needed. Uses routines accumulate and accumulate_scalar to reduce vectors and scalars resp. vowpal-wabbit-8.6.1.dfsg1/cluster/cluster.vcxproj000077500000000000000000000346101332666127000221030ustar00rootroot00000000000000 DebugLeakCheck Win32 DebugLeakCheck x64 Debug Win32 Debug x64 Release Win32 Release x64 {2720BCD9-6731-4A11-BA24-1F74E35BA97F} Win32Proj cluster spanning_tree ..\vowpalwabbit\ 8.1 ..\sdl\SDL-7.0-Recommended.ruleset true Application true Unicode v140 Application true Unicode v140 Application true Unicode v140 Application true Unicode v140 Application false true Unicode v140 Application false true Unicode v140 true $(Configuration)\$(PlatformShortName)\ $(SolutionDir)\$(PlatformShortName)\$(Configuration)\ true $(Configuration)\$(PlatformShortName)\ $(SolutionDir)\$(PlatformShortName)\$(Configuration)\ true $(SolutionDir)\$(PlatformShortName)\$(Configuration)\ $(SolutionDir)$(PlatformShortName)\$(Configuration)\$(ProjectName)\ true $(SolutionDir)\$(PlatformShortName)\$(Configuration)\ $(SolutionDir)$(PlatformShortName)\$(Configuration)\$(ProjectName)\ false $(Configuration)\$(PlatformShortName)\ $(SolutionDir)\$(PlatformShortName)\$(Configuration)\ false $(SolutionDir)\$(PlatformShortName)\$(Configuration)\ $(SolutionDir)$(PlatformShortName)\$(Configuration)\$(ProjectName)\ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) $(SolutionDir) Console true ws2_32.lib;%(AdditionalDependencies) xcopy /v /i /r /y "$(SolutionDir)\$(PlatformShortName)\$(Configuration)" "$(SolutionDir)$(Configuration)" Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) $(SolutionDir) Console true ws2_32.lib;%(AdditionalDependencies) xcopy /v /i /r /y "$(SolutionDir)\$(PlatformShortName)\$(Configuration)" "$(SolutionDir)$(Configuration)" Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) $(SolutionDir) MultiThreadedDebug Console true ws2_32.lib;%(AdditionalDependencies) Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) $(SolutionDir) MultiThreadedDebug Console true ws2_32.lib;%(AdditionalDependencies) Level3 MaxSpeed true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) ../vowpalwabbit Console true true true ws2_32.lib;%(AdditionalDependencies) xcopy /v /i /r /y "$(SolutionDir)\$(PlatformShortName)\$(Configuration)" "$(SolutionDir)$(Configuration)" Level3 MaxSpeed true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) MultiThreaded $(SolutionDir) Console true true true ws2_32.lib;%(AdditionalDependencies) This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. vowpal-wabbit-8.6.1.dfsg1/cluster/mapscript-yarn.sh000077500000000000000000000013601332666127000223060ustar00rootroot00000000000000#!/usr/bin/env bash set -e usage="$0 out_dir in_dir" if [ "$2" == "" ] then echo $usage exit fi set -u out_directory=$1 in_directory=$2 hadoop fs -rmr $out_directory > /dev/null 2>&1 || true ./spanning_tree || true hadoop jar /usr/lib/hadoop-mapreduce/hadoop-streaming.jar \ -Dmapred.job.name="vw allreduce $in_directory" \ -Dmapred.map.tasks.speculative.execution=true \ -Dmapred.reduce.tasks=0 \ -Dmapred.child.java.opts="-Xmx100m" \ -Dmapred.task.timeout=600000000 \ -Dmapred.job.map.memory.mb=1000 \ -input $in_directory \ -output $out_directory \ -file ../vowpalwabbit/vw \ -file /usr/lib64/libboost_program_options.so.5 \ -file /lib64/libz.so.1 \ -file runvw-yarn.sh \ -mapper runvw-yarn.sh \ -reducer NONE vowpal-wabbit-8.6.1.dfsg1/cluster/mapscript.sh000077500000000000000000000013761332666127000213460ustar00rootroot00000000000000out_directory=$1 in_directory=$2 nmappers=$3; hadoop fs -rmr $out_directory > /dev/null 2>&1; total=`hadoop fs -ls $in_directory | cut -d " " -f 7 | awk 'BEGIN{sum = 0} {if(NF > 0) sum += $1;} END{print sum;}'` echo $total mapsize=`expr $total / $nmappers` maprem=`expr $total % $nmappers` mapsize=`expr $mapsize + $maprem` mapsize=`expr $mapsize + 100` echo $mapsize ./spanning_tree hadoop jar $HADOOP_HOME/hadoop-streaming.jar -Dmapred.job.queue.name=search -Dmapred.min.split.size=$mapsize -Dmapred.map.tasks.speculative.execution=true -Dmapred.reduce.tasks=0 -Dmapred.job.map.memory.mb=3000 -Dmapred.child.java.opts="-Xmx100m" -Dmapred.task.timeout=600000000 -input $in_directory -output $out_directory -file ../vw -file runvw.sh -mapper runvw.sh -reducer NONE vowpal-wabbit-8.6.1.dfsg1/cluster/runvw-yarn.sh000077500000000000000000000047301332666127000214710ustar00rootroot00000000000000#!/usr/bin/env bash set -e dryrun=$1 # MR1 sets $mapred_map_tasks # MR2/YARN sets $mapreduce_job_maps nmappers=$mapreduce_job_maps # MR1 sets $mapreduce_job_submithost # MR2/YARN sets $mapreduce_job_submithostname submit_host=$mapreduce_job_submithostname # MR1 sets $mapred_output_dir # MR2/YARN sets $mapreduce_output_fileoutputformat_outputdir output_dir=$mapreduce_output_fileoutputformat_outputdir set -u # This works on both MR1 and MR2/YARN mapper=`printenv mapred_task_id | cut -d "_" -f 5` mapred_job_id=`echo "$mapred_job_id" | awk -F "_" '{print $NF}'` # debug echo $mapper > /dev/stderr echo $nmappers > /dev/stderr echo $output_dir > /dev/stderr echo $submit_host > /dev/stderr rm -f temp.cache || true echo 'Starting training' > /dev/stderr # SGD step gdcmd="./vw -b 20 --total $nmappers --node $mapper --unique_id $mapred_job_id --passes 2 --save_per_pass --readable_model sgd.rmodel -d /dev/stdin -f sgd.vwmodel --cache_file temp.cache --span_server $submit_host --loss_function=logistic" # BFGS step mapred_job_id=`expr $mapred_job_id \* 2` #create new nonce bfgscmd="./vw --total $nmappers --node $mapper --unique_id $mapred_job_id --cache_file temp.cache --bfgs --mem 5 --passes 2 --save_per_pass --readable_model bfgs.rmodel --span_server $submit_host -f bfgs.vwmodel -i sgd.vwmodel --loss_function=logistic" if [ "$mapper" == '000000' ] then if [ -z ${dryrun:-} ] then echo "SGD ..." > /dev/stderr $gdcmd > >(tee vw.out) 2> >(tee vw.err >&2) echo "BFGS ..." > /dev/stderr $bfgscmd > >(tee -a vw.out) 2> >(tee -a vw.err >&2) else echo "Dryrrun" echo $gdcmd set cat > /dev/null fi if [ $? -ne 0 ] then exit 5 fi # store models and output in hdfs hadoop fs -put -f sgd.vwmodel* $output_dir || true hadoop fs -put -f sgd.rmodel* $output_dir || true hadoop fs -put -f bfgs.vwmodel* $output_dir || true hadoop fs -put -f bfgs.rmodel* $output_dir || true hadoop fs -put -f vw.* $output_dir || true else if [ -z ${dryrun:-} ] then echo "SGD ..." $gdcmd echo "BFGS ..." $bfgscmd else echo "Dryrrun" echo $gdcmd echo $bfgscmd cat > /dev/null fi if [ $? -ne 0 ] then exit 6 fi fi vowpal-wabbit-8.6.1.dfsg1/cluster/runvw.sh000077500000000000000000000027211332666127000205200ustar00rootroot00000000000000#!/bin/bash mapper=`printenv mapred_task_id | cut -d "_" -f 5` rm -f temp.cache date +"%F %T Start training mapper=$mapper" > /dev/stderr vwcmd="./vw -b 24 --total $mapred_map_tasks --node $mapper --cache_file temp.cache --span_server $mapreduce_job_submithost --loss_function=logistic" mapred_job_id=`echo $mapred_job_id | tr -d 'job_'` gdcmd="$vwcmd --unique_id $mapred_job_id --passes 1 --adaptive --exact_adaptive_norm -d /dev/stdin -f tempmodel" mapred_job_id=`expr $mapred_job_id \* 2` #create new nonce bfgscmd="$vwcmd --unique_id $mapred_job_id --bfgs --mem 5 --passes 20 -f model -i tempmodel" if [ "$mapper" == '000000' ]; then $gdcmd > mapperout 2>&1 if [ $? -ne 0 ]; then date +"%F %T Failed mapper=$mapper cmd=$gdcmd" > /dev/stderr exit 1 fi $bfgscmd >> mapperout 2>&1 outfile=$mapred_output_dir/model mapperfile=$mapred_output_dir/mapperout found=`hadoop fs -lsr | grep $mapred_output_dir | grep mapperout` if [ "$found" != "" ]; then hadoop fs -rm -r $mapperfile fi found=`hadoop fs -lsr | grep $mapred_output_dir | grep model` if [ "$found" != "" ]; then hadoop fs -rm -r $outfile fi date +"%F %T outfile=$outfile" > /dev/stderr hadoop fs -put model $outfile hadoop fs -put mapperout $mapperfile else $gdcmd if [ $? -ne 0 ]; then date +"%F %T Failed mapper=$mapper cmd=$gdcmd" > /dev/stderr exit 1 fi $bfgscmd fi date +"%F %T Done mapper=$mapper" > /dev/stderr vowpal-wabbit-8.6.1.dfsg1/cluster/single_machine000077500000000000000000000011721332666127000216720ustar00rootroot00000000000000#!/bin/sh # A simplest-possible example of parallel code at work on a single machine. ./spanning_tree # This isn't actually useful---to do something useful, you would need to partition the dataset amongst the VW instances head -n 100 ../test/train-sets/0001.dat > head_data tail -n 100 ../test/train-sets/0001.dat > tail_data ../vowpalwabbit/vw --total 2 --node 0 --unique_id 0 -d head_data --cache_file c0 -k --passes 100 --span_server localhost --holdout_off > node_0 2>&1 & ../vowpalwabbit/vw --total 2 --node 1 --unique_id 0 -d tail_data --cache_file c1 -k --passes 100 --span_server localhost --holdout_off killall spanning_tree vowpal-wabbit-8.6.1.dfsg1/cluster/spanning_tree_main.cc000066400000000000000000000024221332666127000231450ustar00rootroot00000000000000/* Copyright (c) 2011 Yahoo! Inc. All rights reserved. The copyrights embodied in the content of this file are licensed under the BSD (revised) open source license This creates a binary tree topology over a set of n nodes that connect. */ #include "spanning_tree.h" #include "vw_exception.h" #ifdef _WIN32 int daemon(int a, int b) { return 0; } int getpid() { return (int) ::GetCurrentProcessId(); } #endif #include #include #include #include #include using namespace std; using namespace VW; int main(int argc, char* argv[]) { if (argc > 2) { cout << "usage: spanning_tree [--nondaemon | pid_file]" << endl; exit(0); } try { if (argc == 2 && strcmp("--nondaemon",argv[1])==0) ; else if (daemon(1,1)) THROWERRNO("daemon: "); SpanningTree spanningTree; if (argc == 2 && strcmp("--nondaemon",argv[1])!=0) { ofstream pid_file; pid_file.open(argv[1]); if (!pid_file.is_open()) { cerr << "error writing pid file" << endl; exit(1); } pid_file << getpid() << endl; pid_file.close(); } spanningTree.Run(); } catch (VW::vw_exception& e) { cerr << "spanning tree (" << e.Filename() << ":" << e.LineNumber() << "): " << e.what() << endl; } } vowpal-wabbit-8.6.1.dfsg1/cluster/spark/000077500000000000000000000000001332666127000201165ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cluster/spark/VwSparkCluster.scala000066400000000000000000000201121332666127000240560ustar00rootroot00000000000000import java.net.InetAddress import org.apache.spark.Logging /** * The goal of this class is to provide as easy method to pipe data through an external command. It is done by combining * a {@link PipedOutputStream} with a {@link PipedInputStream} to create a single pipe to feed data through. This is * done asynchronously so data can be read and written to at the same time. * Created by jmorra on 1/22/15. */ class PipeUtils(bufferSize: Int = 1 << 20) { import java.io._ import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future import scala.language.postfixOps import scala.sys.process._ /** * This implicit class will allow easy access to streaming through external processes. This * should work on a line by line basis just like Spark's pipe command. * http://stackoverflow.com/questions/28095469/stream-input-to-external-process-in-scala * @param s: The input stream */ implicit class IteratorStream(s: TraversableOnce[String]) { def pipe(cmd: String): Stream[String] = cmd #< iter2is(s) lines def pipe(cmd: Seq[String]): Stream[String] = cmd #< iter2is(s) lines def run(cmd: String): String = cmd #< iter2is(s) !! private[this] def iter2is[A](it: TraversableOnce[A]): InputStream = { // What is written to the output stream will appear in the input stream. val pos = new PipedOutputStream val pis = new PipedInputStream(pos, bufferSize) val w = new PrintWriter(new BufferedOutputStream(pos, bufferSize), false) // Scala 2.11 (scala 2.10, use 'future'). Executes asynchronously. // Fill the stream, then close. Future { try it.foreach(w.println) finally w.close } // Return possibly before pis is fully written to. pis } } } /** * A framework for running VW in a cluster environment using Apache Spark. This * is meant only as a framework and may require some modification to work under your specific case. * Created by jmorra on 8/19/15. */ case class VwSparkCluster( pipeUtils: PipeUtils = new PipeUtils, ipAddress: String = InetAddress.getLocalHost.getHostAddress, defaultParallelism: Int = 2) extends Logging { import java.io._ import org.apache.commons.io.IOUtils import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext import scala.sys.process._ import pipeUtils._ /** * This will learn a VW model in cluster mode. If you notice that this command never starts and just stalls then the parallelism * is probably too high. Refer to this * for more information. * @param data an RDD of Strings that are in VW input format. * @param vwCmd the VW command to run. Note that this command must NOT contain --cache_file and -f. Those will automatically * be appended if necessary. * @param parallelism the amount of parallelism to use. This is calculated using a formula defined in getParallelism * if it is not supplied. It is recommended to only supply this if getParallelism is not working * in you case. * @return a byte array containing the final VW model. */ def train(data: RDD[String], vwCmd: String, parallelism: Option[Int] = None): Array[Byte] = { if (numberOfRunningProcesses("spanning_tree") != 1) { throw new IllegalStateException("spanning_tree is not running on the driver, cannot proceed. Please start spanning_tree and try again.") } val sc = data.context val conf = sc.getConf // By using the job id and the RDD id we should get a globally unique ID. val jobId = (conf.get("spark.app.id").replaceAll("[^\\d]", "") + data.id).toLong logInfo(s"VW cluster job ID: $jobId") val partitions = parallelism.getOrElse(getParallelism(sc).getOrElse(defaultParallelism)) logInfo(s"VW cluster parallelism: ${partitions}") val repartitionedData = if (data.partitions.size == partitions) data else data.repartition(partitions) val vwBaseCmd = s"$vwCmd --total $partitions --span_server $ipAddress --unique_id $jobId" logInfo(s"VW cluster baseCmd: $vwBaseCmd") val vwModels = repartitionedData.mapPartitionsWithIndex{case (partition, x) => Iterator(runVWOnPartition(vwBaseCmd, x, partition)) } vwModels.collect.flatten.flatten } def numberOfRunningProcesses(process: String): Int = "ps aux".#|(s"grep $process").!!.split("\n").size - 1 /** * Gets the executor storage status excluding the driver node. * @param sc the SparkContext * @return an Array of Strings that are the names of all the storage statuses. */ def executors(sc: SparkContext): Array[String] = { sc.getExecutorStorageStatus.collect{ case x if x.blockManagerId.executorId != "" => x.blockManagerId.executorId } } /** * Gets the parallelism of the cluster. This is very much so a work in progress that seems to work now. This took * a lot of experimentation on Spark 1.2.0 to get to work. I make no guarantees that it will work on other Spark versions * especially if dynamic * allocation is enabled. I also only tested this with a master of yarn-client and local so I'm not sure how * well it'll behave in other resource management environments (Spark Standalone, Mesos, etc.). * @param sc the SparkContext * @return if the parallelism can be found then the expected amount of parallelism. */ def getParallelism(sc: SparkContext): Option[Int] = { sc.master match { case x if (x.contains("yarn")) => sc.getConf.getOption("spark.executor.cores").map(x => x.toInt * executors(sc).size) case _ => Some(sc.defaultParallelism) } } /** * This will accept a base VW command, and append a cache file if necessary. It will also create a temp file * to store the VW model. It will then run VW on the supplied data. Finally it will return the bytes of the * model ONLY if the partition is 0. * * This function was tricky to write because the end result of each calculation is a file on the local disk. * According to John all the models should be in the same state after learning so we can choose to save * anyone we want, therefore, transferring the contents of each file to the driver would be wasteful. * In order to avoid this unnecessary transfer we're just going to get the first file. Now you might * ask yourself why not just call .first on the RDD. We cannot do that because in that case Spark would * only evaluate the first mapper and we need all of them to be evaluated, hence the need for .collect to * be called. Note that you may have to increase spark.driver.maxResultSize if the size of the VW model * is too large. * @param vwBaseCmd the base VW command without a cache file or an output specified. A cache file will automatically * be used if --passes is specified. * @param data a String a data in VW format to be passed to VW * @param partition the partition number of this chunk of data * @return an Array of the bytes of the VW model ONLY if this is the 0th partition, else None. */ def runVWOnPartition(vwBaseCmd: String, data: Iterator[String], partition: Int): Option[Array[Byte]] = { val cacheFile = if (vwBaseCmd.contains("--passes ")) { val c = File.createTempFile("vw-cache", ".cache") c.deleteOnExit Option(c) } else None val vwBaseCmdWithCache = cacheFile.map(x => s"$vwBaseCmd -k --cache_file ${x.getCanonicalPath}").getOrElse(vwBaseCmd) val output = File.createTempFile("vw-model", ".model") output.deleteOnExit val vwCmd = s"$vwBaseCmdWithCache --node $partition -f ${output.getCanonicalPath}" data.pipe(vwCmd) cacheFile.foreach(_.delete) val vwModel = if (partition == 0) { val inputStream = new BufferedInputStream(new FileInputStream(output)) val byteArray = IOUtils.toByteArray(inputStream) inputStream.close Option(byteArray) } else None output.delete() vwModel } } vowpal-wabbit-8.6.1.dfsg1/configure.ac000066400000000000000000000042551332666127000176110ustar00rootroot00000000000000AC_INIT([vowpal_wabbit], [8.6.1], [jl@hunch.net], [vowpal_wabbit], [https://github.com/JohnLangford/vowpal_wabbit]) AC_CONFIG_HEADERS(vowpalwabbit/config.h) AM_INIT_AUTOMAKE([subdir-objects foreign]) AC_CONFIG_MACRO_DIR([acinclude.d]) # if CXXFLAGS is not given by user, don't use default of "-O2 -g" : ${CXXFLAGS=""} AC_PROG_CXX AC_PROG_CXXCPP AC_LANG_CPLUSPLUS AC_CANONICAL_HOST AC_PROG_INSTALL AM_PROG_LIBTOOL AX_CXX_COMPILE_STDCXX_11 AC_HEADER_STDC AX_BOOST_BASE([1.0], [], [ AC_MSG_ERROR([Could not find a valid version of boost.]) ]) AX_BOOST_PROGRAM_OPTIONS() AX_CHECK_ZLIB() ZLIB_CPPFLAGS="-I${ZLIB_HOME}/include" ZLIB_LDFLAGS="-L${ZLIB_HOME}/lib -lz" AC_SUBST(ZLIB_CPPFLAGS) AC_SUBST(ZLIB_LDFLAGS) PTHREAD_LIBS=-lpthread AX_PTHREAD([], [ AC_MSG_ERROR([Could not find posix thread library.]) ]) nitpick=false AC_ARG_ENABLE([nitpicking], AC_HELP_STRING([--enable-nitpicking],[make compiler warn about possible problems]), [ test "$enableval" = "no" || nitpick=true ] ) AM_CONDITIONAL(NITPICK, test x$nitpick = xtrue) parallelize=false AC_ARG_ENABLE([parallelization],AC_HELP_STRING([--enable-parallelization],[enable openmp]),[ test "$enableval" = "no" || parallelize=true ]) AM_CONDITIONAL(PARALLELIZE, test x$parallelize = xtrue) profile=false AC_ARG_ENABLE([profiling], AC_HELP_STRING([--enable-profiling], [add -pg to C++ compiler flags]), [ test "$enableval" = "no" || profile=true ]) AM_CONDITIONAL(PROFILE, test x$profile = xtrue) vwbug=false AC_ARG_ENABLE([debug], AC_HELP_STRING([--enable-debug], [Enable debugging, disable optimization in the compiler]), [ test "$enableval" = "no" || vwbug=true ]) AM_CONDITIONAL(VWBUG, test x$vwbug = xtrue) clang_libcxx=false AC_ARG_ENABLE([libc++], AC_HELP_STRING([--enable-libc++],[use clang's libc++ vs. g++ libstdc++ headers/libraries]), [ test "$enableval" = "no" || clang_libcxx=true]) AM_CONDITIONAL([CLANG_LIBCXX], test x$clang_libcxx = xtrue) case $host in *-*-freebsd*) freebsd=true ;; *) freebsd=false ;; esac AM_CONDITIONAL(FREEBSD, test x$freebsd = xtrue) AC_CONFIG_FILES([ Makefile vowpalwabbit/Makefile cluster/Makefile library/Makefile libvw.pc libvw_c_wrapper.pc ]) AC_OUTPUT vowpal-wabbit-8.6.1.dfsg1/cs/000077500000000000000000000000001332666127000157225ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/azure/000077500000000000000000000000001332666127000170505ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/azure/ApplicationInsights.config000066400000000000000000000002101332666127000242040ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/azure/Join/000077500000000000000000000000001332666127000177475ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/azure/Join/QueueDictionary.cs000066400000000000000000000024671332666127000234210ustar00rootroot00000000000000using System; using System.Collections.Generic; namespace VowpalWabbit.Azure.Join { public class QueueDictionary { public class Item { public TKey Key { get; set; } public TValue Value { get; set; } } private readonly LinkedList queue = new LinkedList(); private readonly Dictionary> index = new Dictionary>(); public IEnumerable DequeueIf(Predicate pred) { while (queue.First != null && pred(queue.First.Value.Value)) { var item = queue.First.Value; queue.RemoveFirst(); index.Remove(item.Key); yield return item; } } public TValue Remove(TKey key) { LinkedListNode node; if (!index.TryGetValue(key, out node)) return default(TValue); var value = node.Value.Value; queue.Remove(node); index.Remove(key); return value; } public void Enqueue(TKey key, TValue value) { var node = queue.AddLast(new Item { Key = key, Value = value }); index.Add(key, node); } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Properties/000077500000000000000000000000001332666127000212045ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/azure/Properties/AssemblyInfo.cs000066400000000000000000000022131332666127000241240ustar00rootroot00000000000000//------------------------------------------------------------------------------ // // This code was generated by a tool. // Runtime Version:4.0.30319.42000 // // Changes to this file may cause incorrect behavior and will be lost if // the code is regenerated. // //------------------------------------------------------------------------------ [assembly: System.Reflection.AssemblyTitle("Vowpal Wabbit")] [assembly: System.Reflection.AssemblyDescription("Vowpal Wabbit")] [assembly: System.Reflection.AssemblyCompany("Microsoft Corp")] [assembly: System.Reflection.AssemblyProduct("Vowpal Wabbit")] [assembly: System.Reflection.AssemblyCopyright("Copyright (C) Microsoft Corp 2012-2016, Yahoo! Inc. 2007-2012, and many individua" + "l contributors. All rights reserved")] [assembly: System.Runtime.InteropServices.ComVisible(false)] [assembly: System.CLSCompliant(false)] [assembly: System.Runtime.InteropServices.Guid("6a577997-af00-4ca0-8453-fdc8bbdf2a57")] [assembly: System.Reflection.AssemblyVersion("8.3.0.9")] [assembly: System.Reflection.AssemblyFileVersion("8.3.0.9")] vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/000077500000000000000000000000001332666127000204545ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Checkpoint/000077500000000000000000000000001332666127000225435ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Checkpoint/CountingCheckpointPolicy.cs000066400000000000000000000037241332666127000300560ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- namespace VW.Azure.Trainer.Checkpoint { /// /// Implements an example count based checkpoint policy. /// public class CountingCheckpointPolicy : ICheckpointPolicy { private readonly int exampleSyncCount; private int exampleCount; /// /// Initializes a new instance. /// public CountingCheckpointPolicy(int exampleSyncCount) { this.exampleSyncCount = exampleSyncCount; } /// /// Return true if the trainer should checkpoint the model, false otherwise. /// /// Number of examples since last checkpoint. public bool ShouldCheckpointAfterExample(int examples) { this.exampleCount += examples; if (this.exampleCount >= this.exampleSyncCount) { this.exampleCount %= this.exampleSyncCount; return true; } return false; } /// /// Reset checkpoint policy state. /// public void Reset() { this.exampleCount = 0; } /// /// Serialize to string for logging. /// public override string ToString() { return $"CountingCheckpointPolicy: {this.exampleSyncCount}"; } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Checkpoint/ICheckpointPolicy.cs000066400000000000000000000020421332666127000264500ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- namespace VW.Azure.Trainer.Checkpoint { /// /// Interface for model checkpoint policies. /// public interface ICheckpointPolicy { /// /// Return true if the trainer should checkpoint the model, false otherwise. /// /// Number of examples since last checkpoint. bool ShouldCheckpointAfterExample(int examples); /// /// Reset checkpoint policy state. /// void Reset(); } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Checkpoint/TimespanCheckpointPolicy.cs000066400000000000000000000041751332666127000300510ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Diagnostics; namespace VW.Azure.Trainer.Checkpoint { /// /// Implements a wallclock time based checkpoint policy. /// public class IntervalCheckpointPolicy : ICheckpointPolicy { private Stopwatch stopwatch; private TimeSpan checkpointInterval; /// /// Initializes a new instance. /// public IntervalCheckpointPolicy(TimeSpan checkpointInterval) { this.stopwatch = Stopwatch.StartNew(); this.checkpointInterval = checkpointInterval; } /// /// Return true if the trainer should checkpoint the model, false otherwise. /// /// Number of examples since last checkpoint. public bool ShouldCheckpointAfterExample(int examples) { // call checkpoint every 5 minutes, so that worker can resume processing from 5 minutes back if it restarts. if (this.stopwatch.Elapsed > checkpointInterval) { this.stopwatch.Restart(); return true; } return false; } /// /// Reset checkpoint policy state. /// public void Reset() { this.stopwatch.Restart(); } /// /// Serialize to string for logging. /// public override string ToString() { return $"IntervalCheckpointPolicy: {this.checkpointInterval}"; } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Data/000077500000000000000000000000001332666127000213255ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Data/CheckpointData.cs000066400000000000000000000017461332666127000245450ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; namespace VW.Azure.Trainer.Data { internal sealed class CheckpointData { internal byte[] Model { get; set; } internal byte[] EvalModel { get; set; } internal int TrackbackCount { get; set; } internal string TrackbackList { get; set; } internal bool UpdateClientModel { get; set; } internal string State { get; set; } internal string Timestamp { get; set; } internal DateTime StartDateTime { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Data/CheckpointEvaluateTriggerEvent.cs000066400000000000000000000013261332666127000277620ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- namespace VW.Azure.Trainer.Data { internal sealed class CheckpointEvaluateTriggerEvent { public override string ToString() { return "Checkpoint evaluate request"; } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Data/CheckpointTriggerEvent.cs000066400000000000000000000013621332666127000262730ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- namespace VW.Azure.Trainer.Data { internal sealed class CheckpointTriggerEvent { public override string ToString() { return "Checkpoint request"; } public bool UpdateClientModel { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Data/PipelineData.cs000066400000000000000000000024731332666127000242210ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using VW; namespace VW.Azure.Trainer.Data { internal sealed class PipelineData { internal string JSON { get; set; } internal string Offset { get; set; } internal string PartitionKey { get; set; } internal string PartitionId { get; set; } internal string EventId { get; set; } internal DateTime Timestamp { get; set; } internal int[] Actions { get; set; } internal float[] Probabilities { get; set; } internal float ProbabilityOfDrop { get; set; } /// /// 1-based action mapping to tag. /// internal Dictionary ActionsTags { get; set; } = new Dictionary(); public VowpalWabbitExampleCollection Example { get; set; } } }vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Data/TrainerResult.cs000066400000000000000000000055561332666127000244720ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Linq; using VW; using VW.Labels; namespace VW.Azure.Trainer.Data { internal sealed class TrainerResult { internal TrainerResult(ActionScore[] progressivePrediction, int[] observedActions, float[] observedProbabilities) { if (progressivePrediction == null) throw new ArgumentNullException(nameof(progressivePrediction)); if (observedActions == null) throw new ArgumentNullException(nameof(observedActions)); if (observedProbabilities == null) throw new ArgumentNullException(nameof(observedProbabilities)); if (observedActions.Length != observedProbabilities.Length) throw new ArgumentException($"Actions (length: {observedActions.Length}) and probabilities (length: {observedProbabilities.Length}) must be of equal length"); this.ProgressiveRanking = progressivePrediction.Select(a => (int)a.Action).ToArray(); var probabilitiesOrderedByRanking = progressivePrediction.Select(a => a.Score).ToArray(); this.ProgressiveProbabilities = new float[probabilitiesOrderedByRanking.Length]; for (int i = 0; i < probabilitiesOrderedByRanking.Length; i++) this.ProgressiveProbabilities[ProgressiveRanking[i]] = probabilitiesOrderedByRanking[i]; // Ranking is 0-based this.ObservedRanking = observedActions; this.ObservedProbabilities = new float[observedProbabilities.Length]; for (int i = 0; i < observedActions.Length; i++) this.ObservedProbabilities[observedActions[i] - 1] = observedProbabilities[i]; } internal ContextualBanditLabel Label { get; set; } internal TimeSpan Latency { get; set; } internal string PartitionKey { get; set; } internal string PartitionId { get; set; } internal int[] ProgressiveRanking { get; private set; } internal float[] ProgressiveProbabilities { get; private set; } internal int[] ObservedRanking { get; private set; } internal float[] ObservedProbabilities { get; private set; } internal float ProbabilityOfDrop { get; set; } internal Dictionary ActionsTags { get; set; } internal string EventId { get; set; } internal DateTime Timestamp { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/LearnEventProcessor.cs000066400000000000000000000051151332666127000247500ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Microsoft.ApplicationInsights; using Microsoft.ApplicationInsights.DataContracts; using Microsoft.ServiceBus.Messaging; using System.Collections.Generic; using System.Threading.Tasks; namespace VW.Azure.Trainer { internal sealed class LearnEventProcessor : IEventProcessor { private readonly TrainEventProcessorFactory parent; private readonly TelemetryClient telemetry; private readonly PerformanceCounters perfCounters; internal LearnEventProcessor(TrainEventProcessorFactory parent, PerformanceCounters perfCounters) { this.telemetry = new TelemetryClient(); this.telemetry.Context.Component.Version = "TrainEventProcessor v" + GetType().Assembly.GetName().Version; this.parent = parent; this.perfCounters = perfCounters; } public Task OpenAsync(PartitionContext context) { this.telemetry.TrackTrace( $"OpenPartition Id {context.Lease.PartitionId}", SeverityLevel.Information, new Dictionary { { "PartitionId", context.Lease.PartitionId }, { "Offset", context.Lease.Offset } }); this.perfCounters.EventHub_Processors.Increment(); return Task.FromResult(true); } public async Task ProcessEventsAsync(PartitionContext context, IEnumerable messages) { await this.parent.Stage0_Split(context, messages); } public Task CloseAsync(PartitionContext context, CloseReason reason) { this.telemetry.TrackTrace( $"ClosePartition {context.Lease.PartitionId}: {reason}", SeverityLevel.Information, new Dictionary { { "PartitionId", context.Lease.PartitionId }, { "Reason", reason.ToString() } }); this.perfCounters.EventHub_Processors.Decrement(); return Task.FromResult(true); } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/LearnEventProcessorFactory.cs000066400000000000000000000345261332666127000263100ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Microsoft.ApplicationInsights; using Microsoft.ApplicationInsights.DataContracts; using Microsoft.ServiceBus.Messaging; using System; using System.Linq; using System.Collections.Generic; using System.Threading; using System.Threading.Tasks; using System.Threading.Tasks.Dataflow; using VW; using Newtonsoft.Json; using System.IO; using System.Diagnostics; using System.Reactive.Linq; using System.Reactive.Subjects; using VW.Labels; using System.Text; using VW.Azure.Trainer; using VW.Azure.Trainer.Operations; using VW.Serializer; using VW.Azure.Trainer.Data; namespace VW.Azure { internal sealed class TrainEventProcessorFactory : IEventProcessorFactory, IDisposable { private readonly TelemetryClient telemetry; private readonly PerformanceCounters performanceCounters; private readonly Learner trainer; private EvalOperation evalOperation; private LatencyOperation latencyOperation; private TransformManyBlock deserializeBlock; private TransformManyBlock learnBlock; private ActionBlock checkpointBlock; private IDisposable checkpointTrigger; internal TrainEventProcessorFactory(OnlineTrainerSettingsInternal settings, Learner trainer, PerformanceCounters performanceCounters) { if (settings == null) throw new ArgumentNullException(nameof(settings)); if (trainer == null) throw new ArgumentNullException(nameof(trainer)); if (performanceCounters == null) throw new ArgumentNullException(nameof(performanceCounters)); this.trainer = trainer; this.performanceCounters = performanceCounters; this.telemetry = new TelemetryClient(); this.telemetry.Context.Component.Version = GetType().Assembly.GetName().Version.ToString(); this.evalOperation = new EvalOperation(settings, performanceCounters); this.latencyOperation = new LatencyOperation(); this.deserializeBlock = new TransformManyBlock( (Func>)this.Stage1_Deserialize, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 4, // Math.Max(2, Environment.ProcessorCount - 1), BoundedCapacity = 1024 }); this.deserializeBlock.Completion.Trace(this.telemetry, "Stage 1 - Deserialization"); this.learnBlock = new TransformManyBlock( (Func>)this.Stage2_ProcessEvent, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 1, BoundedCapacity = 1024 }); this.learnBlock.Completion.Trace(this.telemetry, "Stage 2 - Learning"); // trigger checkpoint checking every second this.checkpointTrigger = Observable.Interval(TimeSpan.FromSeconds(1)) .Select(_ => new CheckpointEvaluateTriggerEvent()) .Subscribe(this.learnBlock.AsObserver()); this.checkpointBlock = new ActionBlock( this.trainer.Checkpoint, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 1, BoundedCapacity = 4 }); this.learnBlock.Completion.Trace(this.telemetry, "Stage 3 - CheckPointing"); // setup pipeline this.deserializeBlock.LinkTo( this.learnBlock, new DataflowLinkOptions { PropagateCompletion = true }); this.learnBlock.LinkTo( this.evalOperation.TargetBlock, new DataflowLinkOptions { PropagateCompletion = true }, obj => obj is TrainerResult); this.learnBlock.LinkTo( this.checkpointBlock, new DataflowLinkOptions { PropagateCompletion = true }, obj => obj is CheckpointData); // consume all unmatched this.learnBlock.LinkTo(DataflowBlock.NullTarget()); } internal void UpdatePerformanceCounters() { this.performanceCounters.Stage1_JSON_Queue.RawValue = this.deserializeBlock.InputCount; this.performanceCounters.Stage2_Learn_Queue.RawValue = this.learnBlock.InputCount; this.performanceCounters.Stage3_Checkpoint_Queue.RawValue = this.checkpointBlock.InputCount; } internal async Task Stage0_Split(PartitionContext context, IEnumerable messages) { foreach (EventData eventData in messages) { try { using (var eventStream = eventData.GetBodyStream()) { using (var sr = new StreamReader(eventStream, Encoding.UTF8)) { string line; while ((line = await sr.ReadLineAsync()) != null) { var data = new PipelineData { JSON = line, PartitionId = context.Lease.PartitionId, PartitionKey = eventData.PartitionKey, Offset = eventData.Offset }; // TODO: ArrayBuffer to avoid string allocation... // also just send char ref + offset + length if (!await this.deserializeBlock.SendAsync(data)) this.telemetry.TrackTrace("Failed to enqueue data"); } this.performanceCounters.Stage0_IncomingBytesPerSec.IncrementBy(eventStream.Position); this.performanceCounters.Stage0_Batches_Size.IncrementBy(eventStream.Position); this.performanceCounters.Stage0_Batches_SizeBase.Increment(); } } this.performanceCounters.Stage0_BatchesPerSec.Increment(); this.performanceCounters.Stage0_Batches_Total.Increment(); } catch (Exception ex) { this.telemetry.TrackException(ex); } } } private static bool TryExtractProperty(VowpalWabbitJsonParseState state, string property, string expectedProperty, JsonToken expectedToken, Action success) { if (property.Equals(expectedProperty, StringComparison.OrdinalIgnoreCase)) { if (!state.Reader.Read() && state.Reader.TokenType != expectedToken) throw new VowpalWabbitJsonException(state.Reader, $"Property '{expectedProperty}' must be of type '{expectedToken}'"); success(state.Reader); return true; } return false; } private static bool TryExtractArrayProperty(VowpalWabbitJsonParseState state, string property, string expectedProperty, Action success) { return TryExtractProperty( state, property, expectedProperty, JsonToken.StartArray, reader => { success(JsonSerializer.CreateDefault().Deserialize(reader)); if (state.Reader.TokenType != JsonToken.EndArray && !reader.Read()) throw new VowpalWabbitJsonException(state.Reader, $"Property {expectedProperty} must end with 'EndArray'"); }); } private IEnumerable Stage1_Deserialize(PipelineData data) { try { using (var jsonReader = new JsonTextReader(new StringReader(data.JSON))) { //jsonReader.FloatParser = Util.ReadDoubleString; // jsonReader.ArrayPool = pool; VowpalWabbitJsonSerializer vwJsonSerializer = null; try { vwJsonSerializer = new VowpalWabbitJsonSerializer(this.trainer.VowpalWabbit, this.trainer.ReferenceResolver); vwJsonSerializer.RegisterExtension((state, property) => { if (TryExtractProperty(state, property, "_eventid", JsonToken.String, reader => data.EventId = (string)reader.Value)) return true; else if (TryExtractProperty(state, property, "_timestamp", JsonToken.Date, reader => data.Timestamp = (DateTime)reader.Value)) return true; else if (TryExtractProperty(state, property, "_ProbabilityOfDrop", JsonToken.Float, reader => data.ProbabilityOfDrop = (float)(reader.Value ?? 0f))) return true; else if (TryExtractArrayProperty(state, property, "_p", arr => data.Probabilities = arr)) return true; else if (TryExtractArrayProperty(state, property, "_a", arr => data.Actions = arr)) return true; else if (TryExtractProperty(state, property, "_tag", JsonToken.String, reader => data.ActionsTags.Add(state.MultiIndex + 1, (string)reader.Value))) return true; return false; }); data.Example = vwJsonSerializer.ParseAndCreate(jsonReader); if (data.Probabilities == null) throw new ArgumentNullException("Missing probabilities (_p)"); if (data.Actions == null) throw new ArgumentNullException("Missing actions (_a)"); if (data.Example == null) { // unable to create example due to missing data // will be trigger later vwJsonSerializer.UserContext = data.Example; // make sure the serialize is not deallocated vwJsonSerializer = null; } } finally { if (vwJsonSerializer != null) vwJsonSerializer.Dispose(); } performanceCounters.Stage1_JSON_DeserializePerSec.Increment(); // delayed if (data.Example == null) { this.performanceCounters.Feature_Requests_Pending.Increment(); yield break; } } } catch (Exception ex) { this.telemetry.TrackException(ex, new Dictionary { { "JSON", data.JSON } }); this.performanceCounters.Stage2_Faulty_Examples_Total.Increment(); this.performanceCounters.Stage2_Faulty_ExamplesPerSec.Increment(); yield break; } yield return data; } private IEnumerable Stage2_ProcessEvent(object evt) { // single threaded loop var eventHubExample = evt as PipelineData; if (eventHubExample != null) { var result = this.trainer.Learn(eventHubExample); // report latency this.latencyOperation.Process(result); yield return result; if (this.trainer.ShouldCheckpoint(1)) yield return this.trainer.CreateCheckpointData(updateClientModel: true); } else if (evt is CheckpointTriggerEvent) yield return this.trainer.CreateCheckpointData(updateClientModel: ((CheckpointTriggerEvent)evt).UpdateClientModel); else if (evt is CheckpointEvaluateTriggerEvent) { if (this.trainer.ShouldCheckpoint(0)) yield return this.trainer.CreateCheckpointData(updateClientModel: true); } else this.telemetry.TrackTrace($"Unsupported stage 2 event '{evt}'", SeverityLevel.Warning); } public IEventProcessor CreateEventProcessor(PartitionContext context) { return new LearnEventProcessor(this, this.performanceCounters); } public ITargetBlock LearnBlock { get { return this.learnBlock; } } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { if (this.checkpointTrigger != null) { this.checkpointTrigger.Dispose(); this.checkpointTrigger = null; } if (this.learnBlock != null) { // complete beginning of the pipeline this.deserializeBlock.Complete(); // wait at the end of the pipeline this.checkpointBlock.Completion.Wait(TimeSpan.FromMinutes(1)); } if (this.evalOperation != null) { this.evalOperation.Dispose(); this.evalOperation = null; } if (this.latencyOperation != null) { this.latencyOperation.Dispose(); this.latencyOperation = null; } } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/LearnEventProcessorHost.cs000066400000000000000000000300351332666127000256050ustar00rootroot00000000000000using Microsoft.ApplicationInsights; using Microsoft.ApplicationInsights.DataContracts; using Microsoft.ServiceBus; using Microsoft.ServiceBus.Messaging; using System; using System.Collections.Generic; using System.Reactive.Linq; using System.Threading; using System.Threading.Tasks; using System.Threading.Tasks.Dataflow; using VW.Azure.Trainer.Data; using VW.Azure.Trainer.Operations; using VW.Serializer; namespace VW.Azure.Trainer { /// /// Azure online trainer. /// public sealed class LearnEventProcessorHost : IDisposable { private readonly TelemetryClient telemetry; private readonly object managementLock = new object(); private TrainEventProcessorFactory trainProcessorFactory; private EventProcessorHost eventProcessorHost; private Learner trainer; private PerformanceCounters perfCounters; private SafeTimer perfUpdater; private DateTime? eventHubStartDateTimeUtc; /// /// Initializes a new instance. /// public LearnEventProcessorHost() { this.telemetry = new TelemetryClient(); // by default read from the beginning of Event Hubs event stream. this.eventHubStartDateTimeUtc = null; } /// /// Performance countners populated by online trainer. /// public PerformanceCounters PerformanceCounters { get { return this.perfCounters; } } /// /// Timestamp when the trainer was last started. /// public DateTime LastStartDateTimeUtc { get; private set; } internal object InitialOffsetProvider(string partition) { string offset; if (this.trainer.State.Partitions.TryGetValue(partition, out offset)) return offset; // either DateTime.UtcNow on reset or null if start the first time return this.eventHubStartDateTimeUtc; } /// /// Starts the trainer with given parameters. /// public async Task StartAsync(OnlineTrainerSettingsInternal settings) { await this.SafeExecute(async () => await this.StartInternalAsync(settings)); } /// /// Stops the trainer. /// /// public async Task StopAsync() { await this.SafeExecute(this.StopInternalAsync); } /// /// Restarts the trainer. /// public async Task Restart(OnlineTrainerSettingsInternal settings) { await this.SafeExecute(async () => await this.RestartInternalAsync(settings)); } /// /// Resets the trainers. /// public async Task ResetModelAsync(OnlineTrainerState state = null, byte[] model = null) { await this.SafeExecute(async () => await this.ResetInternalAsync(state, model)); } /// /// Forces model checkpointing. /// public async Task CheckpointAsync() { await this.SafeExecute(async () => await this.trainProcessorFactory.LearnBlock.SendAsync(new CheckpointTriggerEvent())); } private Task SafeExecute(Func action) { try { // need to do a lock as child tasks are interleaving lock (this.managementLock) { action().Wait(TimeSpan.FromMinutes(3)); } } catch (AggregateException ex) { foreach (var innerEx in ex.Flatten().InnerExceptions) this.telemetry.TrackException(innerEx); throw ex; } catch (Exception ex) { this.telemetry.TrackException(ex); throw ex; } return Task.FromResult(true); } private async Task ResetInternalAsync(OnlineTrainerState state = null, byte[] model = null) { if (this.trainer == null) { this.telemetry.TrackTrace("Online Trainer resetting skipped as trainer hasn't started yet.", SeverityLevel.Information); return; } var msg = "Online Trainer resetting"; bool updateClientModel = false; if (state != null) { msg += "; state supplied"; updateClientModel = true; } if (model != null) { msg += $"; model of size {model.Length} supplied."; updateClientModel = true; } this.telemetry.TrackTrace(msg, SeverityLevel.Information); var settings = this.trainer.Settings; await this.StopInternalAsync(); settings.ForceFreshStart = true; settings.CheckpointPolicy.Reset(); await this.StartInternalAsync(settings, state, model); // make sure we store this fresh model, in case we die we don't loose the reset await this.trainProcessorFactory.LearnBlock.SendAsync(new CheckpointTriggerEvent { UpdateClientModel = updateClientModel }); if (!updateClientModel) { // delete the currently deployed model, so the clients don't use the hold one var latestModel = await this.trainer.GetLatestModelBlob(); this.telemetry.TrackTrace($"Resetting client visible model: {latestModel.Uri}", SeverityLevel.Information); await latestModel.UploadFromByteArrayAsync(new byte[0], 0, 0); } } private async Task RestartInternalAsync(OnlineTrainerSettingsInternal settings) { this.telemetry.TrackTrace("Online Trainer restarting", SeverityLevel.Information); await this.StopInternalAsync(); await this.StartInternalAsync(settings); } private async Task StartInternalAsync(OnlineTrainerSettingsInternal settings, OnlineTrainerState state = null, byte[] model = null) { this.LastStartDateTimeUtc = DateTime.UtcNow; this.perfCounters = new PerformanceCounters(settings.Metadata.ApplicationID); // setup trainer this.trainer = new Learner(settings, this.DelayedExampleCallback, this.perfCounters); if (settings.ForceFreshStart || model != null) this.trainer.FreshStart(state, model); else await this.trainer.FindAndResumeFromState(); // setup factory this.trainProcessorFactory = new TrainEventProcessorFactory(settings, this.trainer, this.perfCounters); // setup host var serviceBusConnectionStringBuilder = new ServiceBusConnectionStringBuilder(settings.JoinedEventHubConnectionString); var joinedEventhubName = serviceBusConnectionStringBuilder.EntityPath; serviceBusConnectionStringBuilder.EntityPath = string.Empty; this.eventProcessorHost = new EventProcessorHost(settings.Metadata.ApplicationID, joinedEventhubName, settings.JoinedEventHubConsumerGroup, serviceBusConnectionStringBuilder.ToString(), settings.StorageConnectionString); // used by this.InitialOffsetProvider if no checkpointed state is found this.eventHubStartDateTimeUtc = settings.EventHubStartDateTimeUtc; await this.eventProcessorHost.RegisterEventProcessorFactoryAsync( this.trainProcessorFactory, new EventProcessorOptions { InitialOffsetProvider = this.InitialOffsetProvider }); // don't perform too often this.perfUpdater = new SafeTimer( TimeSpan.FromMilliseconds(500), this.UpdatePerformanceCounters); var vwArgs = this.trainer.VowpalWabbit.Arguments; this.telemetry.TrackTrace( "OnlineTrainer started", SeverityLevel.Information, new Dictionary { { "CheckpointPolicy", settings.CheckpointPolicy.ToString() }, { "VowpalWabbit", settings.Metadata.TrainArguments }, { "ExampleTracing", settings.EnableExampleTracing.ToString() }, { "LearningRate", vwArgs.LearningRate.ToString() }, { "PowerT", vwArgs.PowerT.ToString() } }); } private void UpdatePerformanceCounters() { lock (this.managementLock) { // make sure this is thread safe w.r.t reset/start/stop/... try { if (this.trainer != null && this.trainProcessorFactory != null) { this.trainer.UpdatePerformanceCounters(); this.trainProcessorFactory.UpdatePerformanceCounters(); } } catch (Exception ex) { this.telemetry.TrackException(ex); } } } private async Task StopInternalAsync() { this.telemetry.TrackTrace("OnlineTrainer stopping", SeverityLevel.Verbose); if (this.perfUpdater != null) { this.perfUpdater.Stop(TimeSpan.FromMinutes(1)); this.perfUpdater = null; } if (this.eventProcessorHost != null) { try { await this.eventProcessorHost.UnregisterEventProcessorAsync(); } catch (Exception ex) { this.telemetry.TrackException(ex); } this.eventProcessorHost = null; } if (this.trainProcessorFactory != null) { // flushes the pipeline this.trainProcessorFactory.Dispose(); this.trainProcessorFactory = null; } if (this.trainer != null) { this.trainer.Dispose(); this.trainer = null; } if (this.perfCounters != null) { this.perfCounters.Dispose(); this.perfCounters = null; } this.telemetry.TrackTrace("OnlineTrainer stopped", SeverityLevel.Verbose); } private void DelayedExampleCallback(VowpalWabbitJsonSerializer serializer) { try { this.perfCounters.Feature_Requests_Pending.IncrementBy(-1); var data = (PipelineData)serializer.UserContext; data.Example = serializer.CreateExamples(); // fire and forget // must not block to avoid dead lock this.trainProcessorFactory.LearnBlock .SendAsync(data) .ContinueWith(async ret => { if (!await ret) { this.telemetry.TrackTrace("Unable to enqueue delayed examples", SeverityLevel.Error); // since we couldn't enqueue, need to dispose here data.Example.Dispose(); } }); } catch (Exception e) { this.telemetry.TrackException(e); } finally { serializer.Dispose(); } } /// /// Dispose the trainer. /// public void Dispose() { try { this.StopAsync().Wait(TimeSpan.FromMinutes(1)); } catch (Exception ex) { this.telemetry.TrackException(ex); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Learner.cs000066400000000000000000000074121332666127000223770ustar00rootroot00000000000000using Microsoft.ApplicationInsights; using Microsoft.WindowsAzure.Storage; using Microsoft.WindowsAzure.Storage.Blob; using System; using System.Collections.Generic; using System.Globalization; using System.Reactive.Linq; using System.Threading.Tasks; using VW; using VW.Serializer; namespace VW.Azure.Trainer { internal partial class Learner : IDisposable { internal const string StateBlobName = "state.json"; private readonly TelemetryClient telemetry; private CloudBlobClient blobClient; private VW.VowpalWabbit vw; private VowpalWabbitJsonReferenceResolver referenceResolver; private List trackbackList; private OnlineTrainerSettingsInternal settings; private OnlineTrainerState state; private readonly Action delayedExampleCallback; private DateTime startDateTime; private readonly PerformanceCounters perfCounters; //private VowpalWabbitThreadedLearning vwAllReduce; internal Learner(OnlineTrainerSettingsInternal settings, Action delayedExampleCallback, PerformanceCounters perfCounters) { this.telemetry = new TelemetryClient(); this.settings = settings; this.delayedExampleCallback = delayedExampleCallback; this.perfCounters = perfCounters; this.trackbackList = new List(); this.blobClient = CloudStorageAccount.Parse(settings.StorageConnectionString).CreateCloudBlobClient(); } internal void UpdatePerformanceCounters() { if (this.referenceResolver != null) { // don't do this too often as it grabs a lock var stats = this.referenceResolver.Statistics; this.perfCounters.Features_Cached.RawValue = stats.ItemCount; this.perfCounters.Feature_Requests_Pending.RawValue = stats.NumberOfOpenRequests; } } internal OnlineTrainerState State { get { return this.state; } } internal OnlineTrainerSettingsInternal Settings { get { return this.settings; } } /// /// create light-weight VW instance for example deserialization /// need thread-safe example pool wrapper as the examples are allocated and disposed on different threads /// internal VW.VowpalWabbit VowpalWabbit { get { return this.vw; } } internal VowpalWabbitJsonReferenceResolver ReferenceResolver { get { return this.referenceResolver; } } internal async Task GetLatestModelBlob() { var latestModelContainerName = OnlineTrainerSettings.ModelContainerName; var mwtModelContainer = this.blobClient.GetContainerReference(latestModelContainerName); await mwtModelContainer.CreateIfNotExistsAsync(); return mwtModelContainer.GetBlockBlobReference(OnlineTrainerSettings.LatestModelBlobName); } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { //if (this.vwAllReduce != null) //{ // this.vwAllReduce.Dispose(); // this.vwAllReduce = null; //} if (this.vw != null) { this.vw.Dispose(); this.vw = null; } if (this.referenceResolver != null) { this.referenceResolver.Dispose(); this.referenceResolver = null; } } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/OnlineTrainerSettings.cs000066400000000000000000000032321332666127000252750ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- namespace VW.Azure.Trainer { /// /// Settings for the online trainer. /// public class OnlineTrainerSettings { /// /// Azure storage container name containing the latest model. /// public const string ModelContainerName = "mwt-models"; /// /// Azure storage blob name of the latest model. /// public const string LatestModelBlobName = "current"; /// /// Azure storage container name containing the latest settings. /// public const string SettingsContainerName = "mwt-settings"; /// /// Azure storage blob name of the latest settings. /// public const string LatestClientSettingsBlobName = "client"; /// /// Application ID used by performance counter instance name. /// public string ApplicationID { get; set; } /// /// Training arguments to be used in training service. /// public string TrainArguments { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/OnlineTrainerSettingsInternal.cs000066400000000000000000000052721332666127000270000ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Microsoft.ServiceBus.Messaging; using System; using VW.Azure.Trainer.Checkpoint; namespace VW.Azure.Trainer { /// /// The internal trainer settings. /// public class OnlineTrainerSettingsInternal { /// /// The Azure storage container name used for model and state history. /// public string StorageContainerName { get; private set; } = "onlinetrainer"; /// /// External supplied meta data. /// public OnlineTrainerSettings Metadata { get; set; } /// /// The initial model the training run started from. /// public string InitialVowpalWabbitModel { get; set; } /// /// Azure storage connection string used for checkpointing. /// public string StorageConnectionString { get; set; } /// /// Input data Azure EventHub connection string. /// public string JoinedEventHubConnectionString { get; set; } /// /// Consumer group used for joined events. /// public string JoinedEventHubConsumerGroup { get; set; } = EventHubConsumerGroup.DefaultGroupName; /// /// Evaluation output Azure Eventhub connection string. /// public string EvalEventHubConnectionString { get; set; } /// /// Checkpoint policy. /// public ICheckpointPolicy CheckpointPolicy { get; set; } /// /// True if examples should be traced. /// public bool EnableExampleTracing { get; set; } /// /// Null will let the trainer read events earliest available timestamps in event hub input; /// Any other valid DateTime will let the trainer read events from that point in time. /// public DateTime? EventHubStartDateTimeUtc { get; set; } /// /// True if a fresh start was forced. /// internal bool ForceFreshStart { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/OnlineTrainerState.cs000066400000000000000000000042231332666127000245560ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Linq; namespace VW.Azure.Trainer { /// /// Online trainer state structure used to serialize state to state.json. /// public class OnlineTrainerState { /// /// Initializes a new instance. /// public OnlineTrainerState() { this.Partitions = new Dictionary(); this.PartitionsDateTime = new Dictionary(); } /// /// Current EventHub state. /// public Dictionary Partitions { get; private set; } /// /// Current EventHub state using data time. /// public Dictionary PartitionsDateTime { get; private set; } /// /// Union of and . /// [JsonIgnore] public Dictionary PartitionsDetailed { get { // PartitionsDetailed return this.Partitions.Union(this.PartitionsDateTime.Select(kv => new KeyValuePair(kv.Key, kv.Value.ToString("u")))) .GroupBy(kv => kv.Key) .ToDictionary(kv => kv.Key, group => string.Join(";", group.Select(kv => kv.Value))); } } /// /// The models name (timestamp + name). /// public string ModelName { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Operations/000077500000000000000000000000001332666127000225775ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Operations/CheckPointOperation.cs000066400000000000000000000117351332666127000270450ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Microsoft.ApplicationInsights.DataContracts; using Microsoft.WindowsAzure.Storage.Blob; using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Threading.Tasks; using VW.Azure.Trainer.Data; namespace VW.Azure.Trainer { internal partial class Learner { internal bool ShouldCheckpoint(int numExamples) { // don't checkpoint if we didn't see any valid events. return this.trackbackList.Count > 0 && this.settings.CheckpointPolicy.ShouldCheckpointAfterExample(numExamples); } internal CheckpointData CreateCheckpointData(bool updateClientModel) { // TODO: checkpoint resolver state. var data = new CheckpointData { Timestamp = DateTime.UtcNow.ToString("yyyyMMdd/HHmmss", CultureInfo.InvariantCulture), UpdateClientModel = updateClientModel, StartDateTime = this.startDateTime }; var modelId = Guid.NewGuid().ToString(); // store the model name this.state.ModelName = $"{data.Timestamp}/model"; data.State = JsonConvert.SerializeObject(this.State); data.TrackbackCount = this.trackbackList.Count; data.TrackbackList = $"modelid: {modelId}\n" + string.Join("\n", this.trackbackList); this.trackbackList.Clear(); using (var memStream = new MemoryStream()) { this.vw.ID = modelId; this.vw.SaveModel(memStream); data.Model = memStream.ToArray(); return data; } } internal async Task Checkpoint(object obj) { try { var data = obj as CheckpointData; if (data == null) { this.telemetry.TrackTrace($"Received invalid data: {data}"); return; } var modelName = data.Timestamp + "/model"; this.telemetry.TrackTrace( "CheckPoint " + modelName, SeverityLevel.Information); var container = this.blobClient.GetContainerReference(this.settings.StorageContainerName); await container.CreateIfNotExistsAsync(); // save model to storage account // save trackback file to storage account var trackbackName = string.Format(CultureInfo.InvariantCulture, "{0}/model.trackback", data.Timestamp); var trackbackBlob = container.GetBlockBlobReference(trackbackName); // keep a history of state files var stateName = string.Format(CultureInfo.InvariantCulture, "{0}/{1}", data.Timestamp, Learner.StateBlobName); var stateBlob = container.GetBlockBlobReference(stateName); await Task.WhenAll( trackbackBlob.UploadTextAsync(data.TrackbackList), stateBlob.UploadTextAsync(data.State)); var modelBlob = await ExportModel(container, data.Model, modelName, data.TrackbackCount); // update the fast recovery state file var latestState = container.GetBlockBlobReference(Learner.StateBlobName); await latestState.StartCopyAsync(stateBlob); if (data.UpdateClientModel) { // update latest model var latestModel = await this.GetLatestModelBlob(); await latestModel.StartCopyAsync(modelBlob); } } catch (Exception ex) { this.telemetry.TrackException(ex); } } private async Task ExportModel(CloudBlobContainer container, byte[] model, string modelName, int numExamples) { var modelBlob = container.GetBlockBlobReference(modelName); await modelBlob.UploadFromByteArrayAsync(model, 0, model.Length); this.telemetry.TrackTrace( $"Model Save {modelBlob.Uri}", SeverityLevel.Information, new Dictionary { { "Size", model.Length.ToString() }, { "Uri", modelBlob.Uri.ToString() }, { "Examples added", numExamples.ToString() } }); return modelBlob; } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Operations/EvalOperation.cs000066400000000000000000000223131332666127000256770ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Microsoft.ApplicationInsights; using Microsoft.ServiceBus.Messaging; using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Reactive.Linq; using System.Text; using System.Threading.Tasks.Dataflow; using VW.Azure.Trainer.Data; namespace VW.Azure.Trainer.Operations { internal sealed class EvalData { internal EvalEventData Data { get; set; } internal string JSON { get; set; } internal string PartitionKey { get; set; } } /// /// Evaluation Event Data /// public sealed class EvalEventData { /// /// Policy name /// [JsonProperty("name")] public string Name { get; set; } /// /// Weighted cost /// [JsonProperty("weightedcost")] public float WeightedCost { get; set; } /// /// Importance weight /// [JsonProperty("importanceweight")] public float ImportanceWeight { get; set; } /// /// Timestamp /// [JsonProperty("timestamp")] public DateTime Timestamp { get; set; } /// /// Event Id /// [JsonProperty("eventid")] public string EventId { get; set; } } internal sealed class EvalOperation : IDisposable { private readonly EventHubClient evalEventHubClient; private TransformManyBlock evalBlock; private IDisposable evalBlockDisposable; private TelemetryClient telemetry; private PerformanceCounters performanceCounters; internal EvalOperation(OnlineTrainerSettingsInternal settings, PerformanceCounters performanceCounters) { this.performanceCounters = performanceCounters; this.telemetry = new TelemetryClient(); // evaluation pipeline this.evalEventHubClient = EventHubClient.CreateFromConnectionString(settings.EvalEventHubConnectionString); this.evalBlock = new TransformManyBlock( (Func>)this.OfflineEvaluate, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 4, BoundedCapacity = 1024 }); this.evalBlock.Completion.Trace(this.telemetry, "Stage 4 - Evaluation pipeline"); // batch output together to match EventHub throughput by maintaining maximum latency of 1 seconds this.evalBlockDisposable = this.evalBlock.AsObservable() .GroupBy(k => k.PartitionKey) .Select(g => g.Window(TimeSpan.FromSeconds(1)) .Select(w => w.Buffer(245 * 1024, e => Encoding.UTF8.GetByteCount(e.JSON))) .SelectMany(w => w) .Subscribe(this.UploadEvaluation)) .Publish() .Connect(); } internal ITargetBlock TargetBlock { get { return this.evalBlock; } } private List OfflineEvaluate(object trainerResultObj) { try { var trainerResult = trainerResultObj as TrainerResult; if (trainerResult == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult is null"); return new List(); } return this.OfflineEvaluateInternal(trainerResult) // insert event id & timestamp to enable data correlation .Select(e => { e.EventId = trainerResult.EventId; e.Timestamp = trainerResult.Timestamp; var ed = new EvalData { PartitionKey = trainerResult.PartitionKey, Data = e, JSON = JsonConvert.SerializeObject(e) }; return ed; }) .ToList(); } catch (Exception e) { this.telemetry.TrackException(e); return new List(); } } private IEnumerable OfflineEvaluateInternal(TrainerResult trainerResult) { this.performanceCounters.Stage4_Evaluation_PerSec.Increment(); this.performanceCounters.Stage4_Evaluation_Total.Increment(); if (trainerResult == null) { yield break; } if (trainerResult.Label == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult.Label is null"); yield break; } if (trainerResult.ProgressiveProbabilities == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult.Probabilities is null"); yield break; } var pi_a_x = trainerResult.ProgressiveProbabilities[trainerResult.Label.Action - 1]; var p_a_x = trainerResult.Label.Probability * (1 - trainerResult.ProbabilityOfDrop); // the latest one we're currently training yield return new EvalEventData { Name = "Latest Policy", // calcuate expectation under current randomized policy (using current exploration strategy) // VW action is 0-based, label Action is 1 based WeightedCost = (trainerResult.Label.Cost * pi_a_x) / p_a_x, ImportanceWeight = pi_a_x / p_a_x }; // the one currently running yield return new EvalEventData { Name = "Deployed Policy", WeightedCost = trainerResult.Label.Cost, ImportanceWeight = 1 // for deployed policy just use the observed cost }; // Default = choosing the action that's supplied by caller yield return new EvalEventData { Name = "Default Policy", WeightedCost = VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, (uint)1, trainerResult.Label.Cost, trainerResult.Label.Probability), ImportanceWeight = trainerResult.Label.Action == 1 ? 1 / (trainerResult.ObservedProbabilities[0] * (1 - trainerResult.ProbabilityOfDrop)) : 0 }; // per action tag policies for (int action = 1; action <= trainerResult.ProgressiveRanking.Length; action++) { string tag; if (!trainerResult.ActionsTags.TryGetValue(action, out tag)) tag = action.ToString(CultureInfo.InvariantCulture); var name = $"Constant Policy {tag}"; yield return new EvalEventData { Name = name, WeightedCost = VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, (uint)action, trainerResult.Label.Cost, trainerResult.Label.Probability), ImportanceWeight = trainerResult.Label.Action == action ? 1 / (trainerResult.ObservedProbabilities[action - 1] * (1 - trainerResult.ProbabilityOfDrop)) : 0 }; } } private void UploadEvaluation(IList batch) { try { this.performanceCounters.Stage4_Evaluation_Total.Increment(); this.performanceCounters.Stage4_Evaluation_BatchesPerSec.Increment(); // construct multi-line JSON // TODO: check on how we batch in client library, we should use the EventId var eventData = new EventData(Encoding.UTF8.GetBytes(string.Join("\n", batch.Select(b => b.JSON)))) { PartitionKey = batch.First().PartitionKey }; this.evalEventHubClient.Send(eventData); } catch (Exception e) { this.telemetry.TrackException(e); } } public void Dispose() { if (this.evalBlock != null) { this.evalBlock.Complete(); this.evalBlock.Completion.Wait(TimeSpan.FromMinutes(1)); this.evalBlock = null; } if (this.evalBlockDisposable != null) { this.evalBlockDisposable.Dispose(); this.evalBlockDisposable = null; } } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Operations/InitializeOperation.cs000066400000000000000000000216651332666127000271220ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Microsoft.ApplicationInsights.DataContracts; using Microsoft.WindowsAzure.Storage.Blob; using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Runtime.Caching; using System.Text.RegularExpressions; using System.Threading.Tasks; using VW; using VW.Serializer; namespace VW.Azure.Trainer { internal partial class Learner { public async Task FindAndResumeFromState() { var container = this.blobClient.GetContainerReference(this.settings.StorageContainerName); await container.CreateIfNotExistsAsync(); var stateBlob = container.GetBlockBlobReference(Learner.StateBlobName); if (await stateBlob.ExistsAsync() && await this.TryResumeFromState(stateBlob)) return; // find days var dayDirectories = from c in container.ListBlobs(useFlatBlobListing: false) let dir = c as CloudBlobDirectory where dir != null && dir.Prefix.Length >= 8 let date = DateTime.ParseExact(dir.Prefix.Substring(0, 8), "yyyyMMdd", CultureInfo.InvariantCulture) orderby date descending select dir; foreach (var day in dayDirectories) { // find state files var states = from f in container.ListBlobs(day.Prefix, useFlatBlobListing: true) let file = f as CloudBlockBlob where file != null let match = Regex.Match(file.Name, @"^\d{8}/(\d{6})/" + Learner.StateBlobName + "$") where match.Success let time = DateTime.ParseExact(match.Groups[1].Value, "HHmmss", CultureInfo.InvariantCulture) orderby time descending select file; foreach (var file in states) { if (await TryResumeFromState(file)) return; } } // unable to find a model, fallback to fresh start this.FreshStart(); } internal void FreshStart(OnlineTrainerState state = null, byte[] model = null) { if (state == null) state = new OnlineTrainerState(); this.telemetry.TrackTrace("Fresh Start", SeverityLevel.Information); // start from scratch this.state = state; // save extra state so learning can be resumed later with new data var settings = new VowpalWabbitSettings("--save_resume --preserve_performance_counters " + this.settings.Metadata.TrainArguments); if (model != null) settings.ModelStream = new MemoryStream(model); this.InitializeVowpalWabbit(settings); } private async Task TryLoadModel() { // find the model blob if (string.IsNullOrEmpty(this.state.ModelName)) { this.telemetry.TrackTrace("Model not specified"); return false; } var container = this.blobClient.GetContainerReference(this.settings.StorageContainerName); if (!await container.ExistsAsync()) { this.telemetry.TrackTrace($"Storage container missing '{this.settings.StorageContainerName}'"); return false; } var modelBlob = container.GetBlockBlobReference(this.state.ModelName); if (!await modelBlob.ExistsAsync()) { this.telemetry.TrackTrace($"Model blob '{this.state.ModelName}' is missing"); return false; } // load the model var args = "--save_resume --preserve_performance_counters " + this.settings.Metadata.TrainArguments; try { using (var modelStream = await modelBlob.OpenReadAsync()) { // it's up to the external system to make sure the train arguments are compatible with the stored model // if the arguments are changed substantially, one needs to invoke Reset which forces a refresh this.InitializeVowpalWabbit(new VowpalWabbitSettings(args) { ModelStream = modelStream }); this.telemetry.TrackTrace($"Model loaded {this.state.ModelName}", SeverityLevel.Verbose); } } catch (VowpalWabbitArgumentDisagreementException ex) { // found conflicting arguments. Start fresh model this.InitializeVowpalWabbit(new VowpalWabbitSettings(args)); this.telemetry.TrackTrace($"Arguments found in model {this.state.ModelName} disagree with newly supplied arguments: {args}. Discarding model and starting fresh: {ex.Message}", SeverityLevel.Verbose); } // store the initial model this.settings.InitialVowpalWabbitModel = this.state.ModelName; return true; } private void InitializeVowpalWabbit(VowpalWabbitSettings vwSettings) { if (this.settings.EnableExampleTracing) { vwSettings.EnableStringExampleGeneration = true; vwSettings.EnableStringFloatCompact = true; } vwSettings.EnableThreadSafeExamplePooling = true; vwSettings.MaxExamples = 64 * 1024; try { this.startDateTime = DateTime.UtcNow; this.vw = new VW.VowpalWabbit(vwSettings); var cmdLine = vw.Arguments.CommandLine; if (!(cmdLine.Contains("--cb_explore") || cmdLine.Contains("--cb_explore_adf"))) throw new ArgumentException("Only cb_explore and cb_explore_adf are supported"); } catch (Exception ex) { this.telemetry.TrackException(ex, new Dictionary { { "help", "Invalid model. For help go to https://github.com/JohnLangford/vowpal_wabbit/wiki/Azure-Trainer" } }); throw ex; } this.referenceResolver = new VowpalWabbitJsonReferenceResolver( this.delayedExampleCallback, cacheRequestItemPolicyFactory: key => new CacheItemPolicy() { SlidingExpiration = TimeSpan.FromHours(1), RemovedCallback = this.CacheEntryRemovedCallback }); //this.vwAllReduce = new VowpalWabbitThreadedLearning(vwSettings.ShallowCopy( // maxExampleQueueLengthPerInstance: 4*1024, // parallelOptions: new ParallelOptions // { // MaxDegreeOfParallelism = 2, // }, // exampleDistribution: VowpalWabbitExampleDistribution.RoundRobin, // exampleCountPerRun: 128 * 1024)); } private void CacheEntryRemovedCallback(CacheEntryRemovedArguments arguments) { switch (arguments.RemovedReason) { case CacheEntryRemovedReason.Evicted: case CacheEntryRemovedReason.Expired: // free memory var serializer = (VowpalWabbitJsonSerializer)arguments.CacheItem.Value; serializer.Dispose(); this.perfCounters.Feature_Requests_Pending.IncrementBy(-1); this.perfCounters.Feature_Requests_Discarded.Increment(); break; } } private async Task TryResumeFromState(CloudBlockBlob stateBlob) { using (var stream = await stateBlob.OpenReadAsync()) using (var reader = new JsonTextReader(new StreamReader(stream))) { var jsonSerializer = JsonSerializer.CreateDefault(); this.state = jsonSerializer.Deserialize(reader); } this.telemetry.TrackTrace( $"Resume from '{stateBlob.Uri}'", SeverityLevel.Verbose, this.state.PartitionsDetailed); return await this.TryLoadModel(); } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Operations/LatencyOperation.cs000066400000000000000000000017641332666127000264160ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Threading.Tasks; using VW.Azure.Trainer.Data; namespace VW.Azure.Trainer { internal sealed class LatencyOperation : ThrottledOperation { public LatencyOperation() : base(TimeSpan.FromSeconds(1)) { } protected override Task ProcessInternal(TrainerResult value) { this.telemetry.TrackMetric("End-to-End Latency " + value.PartitionId, value.Latency.TotalSeconds); return Task.FromResult(true); } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Operations/LearnOperation.cs000066400000000000000000000116501332666127000260530ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Microsoft.ApplicationInsights.DataContracts; using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Reactive.Linq; using VW.Azure.Trainer.Data; using VW.Labels; namespace VW.Azure.Trainer { internal partial class Learner { public TrainerResult Learn(PipelineData example) { try { if (this.settings.EnableExampleTracing) this.telemetry.TrackTrace( "Example", SeverityLevel.Verbose, new Dictionary { { "ID", example.EventId }, { "VW", example.Example.VowpalWabbitString }, { "JSON", example.JSON } }); var label = example.Example.Labels .OfType() .FirstOrDefault(l => l.Probability != 0f || l.Cost != 0); if (label == null) this.telemetry.TrackTrace($"Unable to find valid label for event '{example.EventId}'", SeverityLevel.Warning); // predict first then learn to avoid information leak var progressivePrediction = example.Example.Learn(VowpalWabbitPredictionType.ActionProbabilities, this.vw); //if (this.vwAllReduce != null) //{ // this.vwAllReduce.Post(vw => // { // var actions = example.Example.Learn(VowpalWabbitPredictionType.Multilabel, vw); // PerformanceCounters.Instance.ExamplesLearnedTotal.Increment(); // PerformanceCounters.Instance.ExamplesLearnedSec.Increment(); // PerformanceCounters.Instance.FeaturesLearnedSec.IncrementBy((long)example.Example.NumberOfFeatures); // example.Example.Dispose(); // }); //} // record event id for reproducibility this.trackbackList.Add(example.EventId); this.perfCounters.Stage2_Learn_Total.Increment(); this.perfCounters.Stage2_Learn_ExamplesPerSec.Increment(); this.perfCounters.Stage2_Learn_FeaturesPerSec.IncrementBy((long)example.Example.NumberOfFeatures); // measure latency const int TimeSpanTicksPerMillisecond = 10000; var latency = DateTime.UtcNow - example.Timestamp; var performanceCounterTicks = latency.Ticks * Stopwatch.Frequency / TimeSpanTicksPerMillisecond; this.perfCounters.AverageExampleLatency.IncrementBy(performanceCounterTicks); this.perfCounters.AverageExampleLatencyBase.Increment(); // update partition state if (example.PartitionId != null && example.PartitionId != null) { this.state.Partitions[example.PartitionId] = example.Offset; // this.state.PartitionsDateTime[eventHubExample.PartitionKey] = eventHubExample.Offset; } return new TrainerResult(progressivePrediction, example.Actions, example.Probabilities) { Label = label, PartitionId = example.PartitionId, PartitionKey = example.PartitionKey, Latency = latency, ProbabilityOfDrop = example.ProbabilityOfDrop, ActionsTags = example.ActionsTags, EventId = example.EventId, Timestamp = example.Timestamp }; } catch (Exception ex) { this.telemetry.TrackException(ex, new Dictionary { { "ID", example.EventId }, { "VW", example.Example.VowpalWabbitString }, { "JSON", example.JSON } }); this.perfCounters.Stage2_Faulty_ExamplesPerSec.Increment(); this.perfCounters.Stage2_Faulty_Examples_Total.Increment(); return null; } finally { if (example.Example != null) example.Example.Dispose(); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Operations/ThrottledOperation.cs000066400000000000000000000044031332666127000267610ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Microsoft.ApplicationInsights; using System; using System.Linq; using System.Reactive.Linq; using System.Reactive.Subjects; using System.Threading.Tasks; namespace VW.Azure.Trainer { internal abstract class ThrottledOperation : IDisposable { private Subject pipeline; private IDisposable pipelineDisposable; protected readonly TelemetryClient telemetry; protected ThrottledOperation(TimeSpan dueTime) { this.telemetry = new TelemetryClient(); this.pipeline = new Subject(); // limit the number of events to every 5 seconds var connectable = this.pipeline .Throttle(TimeSpan.FromSeconds(5)) .SelectMany(value => Observable.FromAsync(async () => { try { await this.ProcessInternal(value); } catch (Exception e) { this.telemetry.TrackException(e); } } )) .Replay(); this.pipelineDisposable = connectable.Connect(); } internal void Process(T performance) { this.pipeline.OnNext(performance); } protected abstract Task ProcessInternal(T value); public void Dispose() { if (this.pipeline != null) { this.pipeline.Dispose(); this.pipeline = null; } if (this.pipelineDisposable != null) { this.pipelineDisposable.Dispose(); this.pipelineDisposable = null; } } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/PerformanceCounters.cs000066400000000000000000000301561332666127000247740ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Microsoft.ApplicationInsights.Extensibility; using Microsoft.ApplicationInsights.Extensibility.PerfCounterCollector; using System; using System.Linq; using System.Diagnostics; using Microsoft.ApplicationInsights; using System.Collections.Generic; using System.Text.RegularExpressions; using System.Text; namespace VW.Azure.Trainer { /// /// Performance counters reporting various metrics. /// public sealed class PerformanceCounters : IDisposable { /// /// Performance counter attribute to autmatically allow counter creation. /// public class PerformanceCounterTypeAttribute : Attribute { /// /// Initializes a new instance. /// public PerformanceCounterTypeAttribute(PerformanceCounterType type, string name = null) { this.Type = type; this.Name = name; } /// /// The performance counter type. /// public PerformanceCounterType Type { get; private set; } /// /// The desired name for the performance counter. /// public string Name { get; private set; } } private const string category = "Online Trainer"; static PerformanceCounters() { try { if (PerformanceCounterCategory.Exists(category)) PerformanceCounterCategory.Delete(category); // order to be sure that *Base follows counter var props = typeof(PerformanceCounters) .GetProperties() .Where(p => p.PropertyType == typeof(PerformanceCounter)) .OrderBy(p => p.Name).ToList(); var counterCollection = new CounterCreationDataCollection(); foreach (var p in props) { var attr = (PerformanceCounterTypeAttribute)p.GetCustomAttributes(typeof(PerformanceCounterTypeAttribute), true).First(); counterCollection.Add(new CounterCreationData() { CounterName = p.Name, CounterHelp = string.Empty, CounterType = attr.Type }); } PerformanceCounterCategory.Create(category, "Online Trainer Perf Counters", PerformanceCounterCategoryType.MultiInstance, counterCollection); } catch (Exception e) { new TelemetryClient().TrackException(e); } } /// /// Initializes a new instance. /// public PerformanceCounters(string instance) { try { var perfCollectorModule = new PerformanceCollectorModule(); var props = typeof(PerformanceCounters) .GetProperties() .Where(p => p.PropertyType == typeof(PerformanceCounter)); var all = new List(); foreach (var p in props) { var counter = new PerformanceCounter(category, p.Name, instance, false); p.SetValue(this, counter); counter.RawValue = 0; all.Add(counter); if (!p.Name.EndsWith("Base", StringComparison.Ordinal)) { var perfCounterSpec = $"\\{category}({instance})\\{p.Name}"; var reportAs = p.Name .Replace('_', ' ') .Replace("Per", "/"); // http://i1.blogs.msdn.com/b/visualstudioalm/archive/2015/04/01/application-insights-choose-your-own-performance-counters.aspx // Currently, metric names may only contain letters, round brackets, forward slashes, hyphens, underscores, spaces and dots. var reportAsStringBuilder = new StringBuilder(reportAs); foreach (Match match in Regex.Matches(reportAs, "[0-9]")) reportAsStringBuilder[match.Index] = (char)('A' + (match.Groups[0].Value[0] - '0')); perfCollectorModule.Counters.Add(new PerformanceCounterCollectionRequest(perfCounterSpec, reportAsStringBuilder.ToString())); } } perfCollectorModule.Initialize(TelemetryConfiguration.Active); this.All = all.ToArray(); } catch (Exception e) { new TelemetryClient().TrackException(e); } } /// /// Disposes performance counter native resources. /// public void Dispose() { var props = typeof(PerformanceCounters) .GetProperties() .Where(p => p.PropertyType == typeof(IDisposable)); foreach (var p in props) { var perfCounter = (IDisposable)p.GetValue(this); if (perfCounter != null) { perfCounter.Dispose(); p.SetValue(this, null); } } } /// /// List of all online trainer performance counters. /// public PerformanceCounter[] All { get; private set; } /// /// Number of active Azure EventHub event processors. /// [PerformanceCounterType(PerformanceCounterType.NumberOfItems32)] public PerformanceCounter EventHub_Processors { get; private set; } /// /// Number of cached features. /// [PerformanceCounterType(PerformanceCounterType.NumberOfItems32)] public PerformanceCounter Features_Cached { get; private set; } /// /// Number of pending feature requests. Features referenced by ID, which have not yet occured in the stream. /// [PerformanceCounterType(PerformanceCounterType.NumberOfItems64)] public PerformanceCounter Feature_Requests_Pending { get; private set; } /// /// Number of feature requests discarded (e.g. due to timeout hit). /// [PerformanceCounterType(PerformanceCounterType.NumberOfItems64)] public PerformanceCounter Feature_Requests_Discarded { get; private set; } /// /// Total number of batches received by stage 0. /// [PerformanceCounterType(PerformanceCounterType.NumberOfItems64)] public PerformanceCounter Stage0_Batches_Total { get; private set; } /// /// Number of batches received per second by stage 0. /// [PerformanceCounterType(PerformanceCounterType.RateOfCountsPerSecond64)] public PerformanceCounter Stage0_BatchesPerSec { get; private set; } /// /// Average size of batches received per second by stage 0. /// [PerformanceCounterType(PerformanceCounterType.AverageCount64)] public PerformanceCounter Stage0_Batches_Size { get; private set; } /// /// Average (base) size of batches received per second by stage 0. /// [PerformanceCounterType(PerformanceCounterType.AverageBase)] public PerformanceCounter Stage0_Batches_SizeBase { get; private set; } /// /// Bytes/sec received by stage 0. /// [PerformanceCounterType(PerformanceCounterType.RateOfCountsPerSecond64)] public PerformanceCounter Stage0_IncomingBytesPerSec { get; private set; } /// /// Number of JSON lines in stage 1. /// [PerformanceCounterType(PerformanceCounterType.NumberOfItems64)] public PerformanceCounter Stage1_JSON_Queue { get; private set; } /// /// Number of JSON lines deserialized per second in stage 1. /// [PerformanceCounterType(PerformanceCounterType.RateOfCountsPerSecond64)] public PerformanceCounter Stage1_JSON_DeserializePerSec { get; private set; } /// /// Number of examples queued up for learning. /// [PerformanceCounterType(PerformanceCounterType.NumberOfItems64)] public PerformanceCounter Stage2_Learn_Queue { get; private set; } /// /// Total number of examples learned so far. /// [PerformanceCounterType(PerformanceCounterType.NumberOfItems64)] public PerformanceCounter Stage2_Learn_Total { get; private set; } /// /// Number of examples learned per second. /// [PerformanceCounterType(PerformanceCounterType.RateOfCountsPerSecond64)] public PerformanceCounter Stage2_Learn_ExamplesPerSec { get; private set; } /// /// Number of features learned per second. /// [PerformanceCounterType(PerformanceCounterType.RateOfCountsPerSecond64)] public PerformanceCounter Stage2_Learn_FeaturesPerSec { get; private set; } /// /// Total number of faulty examples encountered so far. /// [PerformanceCounterType(PerformanceCounterType.NumberOfItems64)] public PerformanceCounter Stage2_Faulty_Examples_Total { get; private set; } /// /// Number of faulty examples encountered per second. /// [PerformanceCounterType(PerformanceCounterType.RateOfCountsPerSecond64)] public PerformanceCounter Stage2_Faulty_ExamplesPerSec { get; private set; } /// /// Number of checkpoint requests queued up. /// [PerformanceCounterType(PerformanceCounterType.NumberOfItems64)] public PerformanceCounter Stage3_Checkpoint_Queue { get; private set; } /// /// Total number of evaluation outputs produced. /// [PerformanceCounterType(PerformanceCounterType.NumberOfItems64)] public PerformanceCounter Stage4_Evaluation_Total { get; private set; } /// /// Number of evaluation produced per second. /// [PerformanceCounterType(PerformanceCounterType.RateOfCountsPerSecond64)] public PerformanceCounter Stage4_Evaluation_PerSec { get; private set; } /// /// Total number of evaluation batches produces so far. /// [PerformanceCounterType(PerformanceCounterType.NumberOfItems64)] public PerformanceCounter Stage4_Evaluation_BatchesTotal { get; private set; } /// /// Number of evaluation batches produced per second. /// [PerformanceCounterType(PerformanceCounterType.RateOfCountsPerSecond64)] public PerformanceCounter Stage4_Evaluation_BatchesPerSec { get; private set; } /// /// Average example latency. /// [PerformanceCounterType(PerformanceCounterType.AverageTimer32)] public PerformanceCounter AverageExampleLatency { get; private set; } /// /// Average (base) example latency. /// [PerformanceCounterType(PerformanceCounterType.AverageBase)] public PerformanceCounter AverageExampleLatencyBase { get; private set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Util/000077500000000000000000000000001332666127000213715ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Util/ExtensionMethods.cs000066400000000000000000000041241332666127000252210ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Microsoft.ApplicationInsights; using System; using System.Collections.Generic; using System.Reactive.Linq; using System.Threading.Tasks; namespace VW.Azure.Trainer { internal static class ExtensionMethods { internal static Task Trace(this Task task, TelemetryClient telemetry, string message) { return task.ContinueWith(t => { telemetry.TrackTrace($"{message} completed: {t.Status}"); if (t.IsFaulted) telemetry.TrackException(t.Exception); }); } internal static IObservable> Buffer(this IObservable source, int maxSize, Func measure) { return Observable.Create>(obs => { var state = new List(); var size = 0; return source.Subscribe( onNext: v => { size += measure(v); state.Add(v); if (size >= maxSize) { obs.OnNext(state); state = new List(); size = 0; } }, onError: e => obs.OnError(e), onCompleted: () => { if (state.Count > 0) { obs.OnNext(state); } obs.OnCompleted(); }); }); } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/Trainer/Util/SafeTimer.cs000066400000000000000000000033241332666127000236010ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Threading; using System.Threading.Tasks; namespace VW.Azure.Trainer { internal class SafeTimer { private CancellationTokenSource cancellationTokenSource; private ManualResetEventSlim finishedEvent; internal SafeTimer(TimeSpan delay, Action action) { this.cancellationTokenSource = new CancellationTokenSource(); var cancellationToken = cancellationTokenSource.Token; finishedEvent = new ManualResetEventSlim(); Task.Factory .StartNew(async () => { while (true) { cancellationToken.ThrowIfCancellationRequested(); action(); cancellationToken.ThrowIfCancellationRequested(); await Task.Delay(delay, cancellationToken); } }, TaskCreationOptions.LongRunning) .ContinueWith(t => finishedEvent.Set()); } public void Stop(TimeSpan timeout) { cancellationTokenSource.Cancel(); finishedEvent.Wait(timeout); } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure/app.config000066400000000000000000000150631332666127000210240ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/azure/azure.csproj000066400000000000000000000366121332666127000214300ustar00rootroot00000000000000 Debug AnyCPU 8.0.30703 2.0 {43E32C1D-21D6-4BE3-8FA5-D5BA379CBA53} Library Properties VowpalWabbit.Azure VowpalWabbit.Azure v4.6 512 ..\ true true full false bin\Debug\ DEBUG;TRACE prompt 4 x64 pdbonly true bin\Release\ TRACE prompt 4 true true $(SolutionDir)\x64\Debug\ DEBUG;TRACE full x64 prompt MinimumRecommendedRules.ruleset true $(SolutionDir)\x64\Debug\VowpalWabbit.Azure.xml $(SolutionDir)\x64\Release\ TRACE true pdbonly x64 prompt MinimumRecommendedRules.ruleset true $(SolutionDir)\x64\Release\VowpalWabbit.Azure.xml true ..\vw_key.snk $(SolutionDir)\packages\gitlink.2.3.0\lib\net45\GitLink.exe True $(SolutionDir)\packages\Microsoft.ApplicationInsights.Agent.Intercept.2.0.6\lib\net45\Microsoft.AI.Agent.Intercept.dll True $(SolutionDir)\packages\Microsoft.ApplicationInsights.DependencyCollector.2.2.0\lib\net45\Microsoft.AI.DependencyCollector.dll True $(SolutionDir)\packages\Microsoft.ApplicationInsights.PerfCounterCollector.2.2.0\lib\net45\Microsoft.AI.PerfCounterCollector.dll True $(SolutionDir)\packages\Microsoft.ApplicationInsights.WindowsServer.TelemetryChannel.2.2.0\lib\net45\Microsoft.AI.ServerTelemetryChannel.dll True $(SolutionDir)\packages\Microsoft.ApplicationInsights.WindowsServer.2.2.0\lib\net45\Microsoft.AI.WindowsServer.dll True $(SolutionDir)\packages\Microsoft.ApplicationInsights.2.2.0\lib\net46\Microsoft.ApplicationInsights.dll True $(SolutionDir)\packages\Microsoft.ApplicationInsights.TraceListener.2.2.0\lib\net45\Microsoft.ApplicationInsights.TraceListener.dll True $(SolutionDir)\packages\Microsoft.Azure.KeyVault.Core.2.0.4\lib\net45\Microsoft.Azure.KeyVault.Core.dll True $(SolutionDir)\packages\Microsoft.Data.Edm.5.8.1\lib\net40\Microsoft.Data.Edm.dll True $(SolutionDir)\packages\Microsoft.Data.OData.5.8.1\lib\net40\Microsoft.Data.OData.dll True $(SolutionDir)\packages\Microsoft.Data.Services.Client.5.8.1\lib\net40\Microsoft.Data.Services.Client.dll True $(SolutionDir)\packages\EnterpriseLibrary.TransientFaultHandling.6.0.1304.0\lib\portable-net45+win+wp8\Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling.dll True $(SolutionDir)\packages\CommonServiceLocator.1.3\lib\portable-net4+sl5+netcore45+wpa81+wp8\Microsoft.Practices.ServiceLocation.dll True $(SolutionDir)\packages\WindowsAzure.ServiceBus.3.4.3\lib\net45-full\Microsoft.ServiceBus.dll True $(SolutionDir)\packages\Microsoft.Azure.ServiceBus.EventProcessorHost.2.2.8\lib\net45-full\Microsoft.ServiceBus.Messaging.EventProcessorHost.dll True True False $(SolutionDir)\packages\WindowsAzure.Storage.7.2.1\lib\net40\Microsoft.WindowsAzure.Storage.dll True $(SolutionDir)\packages\Newtonsoft.Json.9.0.1\lib\net45\Newtonsoft.Json.dll True $(SolutionDir)\packages\System.Reactive.Core.3.1.1\lib\net46\System.Reactive.Core.dll True $(SolutionDir)\packages\System.Reactive.Interfaces.3.1.1\lib\net45\System.Reactive.Interfaces.dll True $(SolutionDir)\packages\System.Reactive.Linq.3.1.1\lib\net46\System.Reactive.Linq.dll True $(SolutionDir)\packages\System.Reactive.PlatformServices.3.1.1\lib\net46\System.Reactive.PlatformServices.dll True $(SolutionDir)\packages\System.Reactive.Windows.Threading.3.1.1\lib\net45\System.Reactive.Windows.Threading.dll True $(SolutionDir)\packages\System.Spatial.5.8.1\lib\net40\System.Spatial.dll True $(SolutionDir)\packages\Microsoft.Tpl.Dataflow.4.5.24\lib\portable-net45+win8+wpa81\System.Threading.Tasks.Dataflow.dll True Designer Designer {85e55ae0-3784-4968-9271-c81af560e1c1} vw_clr {e621e022-c1f8-433f-905a-ab9a3de072b7} vw_common {e4e962ae-7056-4eb0-a8c5-8dc824a4b068} cs {9e27fa94-ab34-4736-8427-fb7a2ba90d52} cs_json false vowpal-wabbit-8.6.1.dfsg1/cs/azure/azure.nuspec000066400000000000000000000062041332666127000214170ustar00rootroot00000000000000 Vowpal Wabbit Azure VowpalWabbit.Azure vw vowpal wabbit langford ml machine learning azure John Langford et al $version$ https://github.com/JohnLangford/vowpal_wabbit/wiki/C%23-Binding https://github.com/JohnLangford/vowpal_wabbit/blob/master/LICENSE false Official Vowpal Wabbit library including C# interface Copyright (C) Microsoft Corp 2012-2016, Yahoo! Inc. 2007-2012, and many individual contributors. All rights reserved. vowpal-wabbit-8.6.1.dfsg1/cs/azure/packages.config000066400000000000000000000045361332666127000220250ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/azure_service/000077500000000000000000000000001332666127000205705ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/azure_service/DownloadServiceConfiguration.ps1000066400000000000000000000004601332666127000270350ustar00rootroot00000000000000$mcURL = Read-Host -Prompt "Management Center URL" $storageAccountKey = Read-Host "Storage Account Key" $storageAccountKey = [uri]::EscapeDataString($storageAccountKey) Invoke-WebRequest -Uri "$($mcURL)Deployment/GenerateTrainerConfig?key=$storageAccountKey" -OutFile ServiceConfiguration.Local.cscfg vowpal-wabbit-8.6.1.dfsg1/cs/azure_service/OnlineTrainerContent/000077500000000000000000000000001332666127000246745ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/azure_service/OnlineTrainerContent/GCSettingsManagement.ps1000066400000000000000000000171721332666127000313400ustar00rootroot00000000000000<# //********************************************************* // // Copyright (c) Microsoft. All rights reserved. // This code is licensed under the Microsoft Public License. // THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF // ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY // IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR // PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. // //********************************************************* #> Param ( $serverGC = $True, $backgroundGC = $True ) [string]$configFilePath = "$(${env:RoleRoot})\base\x64\WaWorkerHost.exe.config" function Create-ConfigFileIfNotExists { # Only create the Xml document if it does not already exist if(-not (Test-Path -Path $configFilePath -PathType Leaf)) { [System.Xml.XmlDocument]$document = New-Object System.Xml.XmlDocument # config file doesn't exist create a now one [System.Xml.XmlDeclaration]$prolog = $document.CreateXmlDeclaration("1.0", "utf-8", $null) [System.Xml.XmlNode]$child = $document.AppendChild($prolog) [System.Xml.XmlElement]$configurationElement = Append-ElementIfNotExists $document $document.DocumentElement "configuration" # Save a copy of the document $document.Save($configFilePath) } } function Load-ConfigFile { [System.Xml.XmlDocument]$document = New-Object System.Xml.XmlDocument #Check if the document already exists and load it if it does not if(Test-Path -Path $configFilePath -PathType Leaf) { $document.Load($configFilePath) } return $document } function Append-ElementIfNotExists { param ( [System.Xml.XmlDocument]$document, [System.Xml.XmlElement]$parent, [string]$elementName ) [System.Xml.XmlElement]$element = $null [System.Xml.XmlNode]$parentNode = $parent if($document -ne $null) { if($parentNode -eq $null) { $parentNode = $document } $element = $parentNode.SelectSingleNode("./$($elementName)") if($element -eq $null) { $element = $document.CreateElement($elementName) [System.Xml.XmlElement]$child = $parentNode.AppendChild($element) } } return $element } function Create-ElementStructureIfNotExists { param ( [System.Xml.XmlDocument]$document ) [bool]$isSuccess = $false if($document -ne $null) { [System.Xml.XmlElement]$configurationElement = Append-ElementIfNotExists $document $null "configuration" if($configurationElement -ne $null) { [System.Xml.XmlElement]$element = Append-ElementIfNotExists $document $configurationElement "runtime" $isSuccess = $element -ne $null } } return $isSuccess } # Create the document if required Create-ConfigFileIfNotExists # Load the configuration file into the XML document [System.Xml.XmlDocument]$configurationDocument = Load-ConfigFile if($configurationDocument -ne $null) { if(Create-ElementStructureIfNotExists $configurationDocument) { # All of the entries are on the runtime element [System.Xml.XmlElement]$runtimeElement = $configurationDocument.DocumentElement.SelectSingleNode('./runtime') if($runtimeElement -ne $null) { # Set the Server GC to enabled if requested [System.Xml.XmlElement]$serverGCElement = Append-ElementIfNotExists $configurationDocument $runtimeElement "gcServer" $serverGCElement.SetAttribute("enabled", $serverGC.ToString([System.Globalization.CultureInfo]::InvariantCulture).ToLower()) # Set the concurrent GC to enabled if requested [System.Xml.XmlElement]$concurrentGCElement = Append-ElementIfNotExists $configurationDocument $runtimeElement "gcConcurrent" $concurrentGCElement.SetAttribute("enabled", $backgroundGC.ToString([System.Globalization.CultureInfo]::InvariantCulture).ToLower()) } } # Save the document $configurationDocument.Save($configFilePath) } vowpal-wabbit-8.6.1.dfsg1/cs/azure_service/OnlineTrainerContent/ServerGC.cmd000066400000000000000000000020251332666127000270400ustar00rootroot00000000000000 REM ********************************************************* REM REM Copyright (c) Microsoft. All rights reserved. REM This code is licensed under the Microsoft Public License. REM THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF REM ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY REM IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR REM PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. REM REM ********************************************************* REM Check if the script is running in the Azure emulator and if so do not run IF "%IsEmulated%"=="true" goto :EOF If "%UseServerGC%"=="False" GOTO :ValidateBackground If "%UseServerGC%"=="0" GOTO :ValidateBackground SET UseServerGC="True" :ValidateBackground If "%UseBackgroundGC%"=="False" GOTO :CommandExecution If "%UseBackgroundGC%"=="0" GOTO :CommandExecution SET UseBackgroundGC="True" :CommandExecution PowerShell.exe -executionpolicy unrestricted -command ".\GCSettingsManagement.ps1" -serverGC %UseServerGC% -backgroundGC %UseBackgroundGC% Exit /bvowpal-wabbit-8.6.1.dfsg1/cs/azure_service/OnlineTrainerContent/diagnostics.wadcfgx000066400000000000000000000060271332666127000305550ustar00rootroot00000000000000 true vowpal-wabbit-8.6.1.dfsg1/cs/azure_service/OnlineTrainerContent/install.cmd000066400000000000000000000054271332666127000270370ustar00rootroot00000000000000REM Set the value of netfx to install appropriate .NET Framework. REM ***** To install .NET 4.5.2 set the variable netfx to "NDP452" ***** REM ***** To install .NET 4.6 set the variable netfx to "NDP46" ***** REM ***** To install .NET 4.6.1 set the variable netfx to "NDP461" ***** set netfx="NDP461" REM ***** Set script start timestamp ***** set timehour=%time:~0,2% set timestamp=%date:~-4,4%%date:~-10,2%%date:~-7,2%-%timehour: =0%%time:~3,2% set "log=install.cmd started %timestamp%." REM ***** Exit script if running in Emulator ***** if %ComputeEmulatorRunning%=="true" goto exit REM ***** Needed to correctly install .NET 4.6.1, otherwise you may see an out of disk space error ***** set TMP=%PathToNETFXInstall% set TEMP=%PathToNETFXInstall% REM ***** Setup .NET filenames and registry keys ***** if %netfx%=="NDP461" goto NDP461 if %netfx%=="NDP46" goto NDP46 set "netfxinstallfile=NDP452-KB2901954-Web.exe" set netfxregkey="0x5cbf5" goto logtimestamp :NDP46 set "netfxinstallfile=NDP46-KB3045560-Web.exe" set netfxregkey="0x60051" goto logtimestamp :NDP461 set "netfxinstallfile=NDP461-KB3102438-Web.exe" set netfxregkey="0x6041f" :logtimestamp REM ***** Setup LogFile with timestamp ***** md "%PathToNETFXInstall%\log" set startuptasklog="%PathToNETFXInstall%log\startuptasklog-%timestamp%.txt" set netfxinstallerlog="%PathToNETFXInstall%log\NetFXInstallerLog-%timestamp%" echo %log% >> %startuptasklog% echo Logfile generated at: %startuptasklog% >> %startuptasklog% echo TMP set to: %TMP% >> %startuptasklog% echo TEMP set to: %TEMP% >> %startuptasklog% REM ***** Check if .NET is installed ***** echo Checking if .NET (%netfx%) is installed >> %startuptasklog% reg query "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\NET Framework Setup\NDP\v4\Full" /v Release | Find %netfxregkey% if %ERRORLEVEL%== 0 goto installed REM ***** Installing .NET ***** echo Installing .NET with commandline: start /wait %~dp0%netfxinstallfile% /q /serialdownload /log %netfxinstallerlog% /chainingpackage "CloudService Startup Task" >> %startuptasklog% start /wait %~dp0%netfxinstallfile% /q /serialdownload /log %netfxinstallerlog% /chainingpackage "CloudService Startup Task" >> %startuptasklog% 2>>&1 if %ERRORLEVEL%== 0 goto installed echo .NET installer exited with code %ERRORLEVEL% >> %startuptasklog% if %ERRORLEVEL%== 3010 goto restart if %ERRORLEVEL%== 1641 goto restart echo .NET (%netfx%) install failed with Error Code %ERRORLEVEL%. Further logs can be found in %netfxinstallerlog% >> %startuptasklog% :restart echo Restarting to complete .NET (%netfx%) installation >> %startuptasklog% goto end :installed echo .NET (%netfx%) is installed >> %startuptasklog% :end echo install.cmd completed: %date:~-4,4%%date:~-10,2%%date:~-7,2%-%timehour: =0%%time:~3,2% >> %startuptasklog% :exit EXIT /B 0vowpal-wabbit-8.6.1.dfsg1/cs/azure_service/RoleProperties.txt000066400000000000000000000001041332666127000243020ustar00rootroot00000000000000EntryPoint=VowpalWabbit.AzureWorker.dll TargetFrameWorkVersion=v4.0 vowpal-wabbit-8.6.1.dfsg1/cs/azure_service/ServiceConfiguration.Cloud.cscfg000066400000000000000000000031221332666127000267720ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/azure_service/ServiceConfiguration.Local.cscfg000066400000000000000000000031151332666127000267600ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/azure_service/ServiceConfiguration.cscfg000066400000000000000000000020431332666127000257260ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/azure_service/ServiceDefinition.csdef000066400000000000000000000043131332666127000252100ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/azure_service/azure_service.ccproj000066400000000000000000000075121332666127000246450ustar00rootroot00000000000000 Debug AnyCPU 2.9 7263f536-3abe-4a8e-ba66-1341d6a53579 Library Properties OnlineTraining OnlineTraining True azure_service False true full false bin\Debug\ DEBUG;TRACE prompt 4 pdbonly true bin\Release\ TRACE prompt 4 OnlineTrainer {9DE727D8-FD1B-4144-8976-5371663D4C22} True Worker OnlineTrainer True Content Content Content Content 10.0 $(MSBuildExtensionsPath)\Microsoft\VisualStudio\v$(VisualStudioVersion)\Windows Azure Tools\2.8\ powershell -Command "if (-not (Test-Path '$(ProjectDir)\OnlineTrainerContent\NDP461-KB3102438-Web.exe')){ Invoke-WebRequest -Uri 'https://download.microsoft.com/download/3/5/9/35980F81-60F4-4DE3-88FC-8F962B97253B/NDP461-KB3102438-Web.exe' -OutFile $(ProjectDir)\OnlineTrainerContent\NDP461-KB3102438-Web.exe }" vowpal-wabbit-8.6.1.dfsg1/cs/azure_worker/000077500000000000000000000000001332666127000204415ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/azure_worker/CheckpointController.cs000066400000000000000000000023551332666127000251300ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System.Net; using System.Net.Http; using System.Threading.Tasks; using VW.Azure.Trainer; namespace VW.Azure.Worker { /// /// HTTP Front end to trigger checkpointing /// public sealed class CheckpointController : OnlineTrainerController { public CheckpointController(LearnEventProcessorHost trainProcessorFactory) : base(trainProcessorFactory) { } public async Task Get() { if (!this.TryAuthorize()) return this.Request.CreateResponse(HttpStatusCode.Unauthorized); await this.trainProcessorHost.CheckpointAsync(); return this.Request.CreateResponse(HttpStatusCode.OK); } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure_worker/OnlineTrainerController.cs000066400000000000000000000026771332666127000256210ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Microsoft.ApplicationInsights; using Microsoft.Azure; using System.Linq; using System.Web.Http; using VW.Azure.Trainer; namespace VW.Azure.Worker { public class OnlineTrainerController : ApiController { // injected through Unity protected readonly LearnEventProcessorHost trainProcessorHost; protected readonly string adminToken; protected readonly TelemetryClient telemetry; public OnlineTrainerController(LearnEventProcessorHost trainProcessorFactory) { this.telemetry = new TelemetryClient(); this.trainProcessorHost = trainProcessorFactory; this.adminToken = CloudConfigurationManager.GetSetting("AdminToken"); } protected bool TryAuthorize() { var header = this.Request.Headers.SingleOrDefault(x => x.Key == "Authorization"); return !(header.Value == null || adminToken != header.Value.First()); } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure_worker/OnlineTrainerSettingsDownloader.cs000066400000000000000000000121021332666127000272750ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Microsoft.Azure; using Microsoft.WindowsAzure.ServiceRuntime; using Microsoft.WindowsAzure.Storage; using Microsoft.WindowsAzure.Storage.Blob; using System; using System.Diagnostics; using System.IO; using System.Linq; using System.Net; using System.Reactive.Linq; using System.Threading; using System.Threading.Tasks; using VW.Azure.Trainer; namespace VW.Azure.Worker { internal sealed class OnlineTrainerSettingsDownloader : IDisposable { internal static CloudBlob GetSettingsBlockBlob() { var storageAccount = CloudStorageAccount.Parse(CloudConfigurationManager.GetSetting("StorageConnectionString")); var blobClient = storageAccount.CreateCloudBlobClient(); var container = blobClient.GetContainerReference(OnlineTrainerSettings.SettingsContainerName); return container.GetBlobReference(OnlineTrainerSettings.LatestClientSettingsBlobName); } public delegate void DownloadedEventHandler(object sender, byte[] data); public delegate void FailedEventHandler(object sender, Exception e); public event DownloadedEventHandler Downloaded; public event FailedEventHandler Failed; private IDisposable disposable; private CloudBlob settingsBlob; private string blobEtag; public OnlineTrainerSettingsDownloader(TimeSpan interval) { this.settingsBlob = GetSettingsBlockBlob(); RoleEnvironment.Changed += RoleEnvironment_Changed; // run background thread var conn = Observable.Interval(interval) .SelectMany(_ => Observable.FromAsync(this.Execute)) .Replay(); this.disposable = conn.Connect(); } private void RoleEnvironment_Changed(object sender, RoleEnvironmentChangedEventArgs e) { var change = e.Changes .OfType() .FirstOrDefault(c => c.ConfigurationSettingName == "StorageConnectionString"); if (change != null) this.settingsBlob = GetSettingsBlockBlob(); } private async Task Execute(CancellationToken cancellationToken) { var uri = string.Empty; try { uri = this.settingsBlob.Uri.ToString(); // avoid not found exception if (!await this.settingsBlob.ExistsAsync(cancellationToken)) return; if (this.settingsBlob.Properties != null) { // if downloadImmediately is set to false, the downloader // will not download the blob on first check, and on second check // onwards, the blob must have changed before a download is triggered. // this is to support caller who manually downloads the blob first for // other purposes and do not want to redownload. // avoid not modified exception if (this.settingsBlob.Properties.ETag == this.blobEtag) return; var currentBlobEtag = this.blobEtag; this.blobEtag = this.settingsBlob.Properties.ETag; } // download using (var ms = new MemoryStream()) { await this.settingsBlob.DownloadToStreamAsync(ms, cancellationToken); Trace.TraceInformation("Retrieved new blob for {0}", this.settingsBlob.Uri); this.Downloaded?.Invoke(this, ms.ToArray()); } } catch (Exception ex) { if (ex is StorageException) { RequestResult result = ((StorageException)ex).RequestInformation; if (result.HttpStatusCode != (int)HttpStatusCode.NotFound) { Trace.TraceError( "Failed to retrieve '{0}': {1}. {2}", uri, ex.Message, result.HttpStatusMessage); } } else Trace.TraceError("Failed to retrieve '{0}': {1}", uri, ex.Message); this.Failed?.Invoke(this, ex); } } public void Dispose() { RoleEnvironment.Changed -= RoleEnvironment_Changed; if (this.disposable != null) { this.disposable.Dispose(); this.disposable = null; } } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure_worker/OnlineTrainerSettingsWatcher.cs000066400000000000000000000172031332666127000266030ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Microsoft.ApplicationInsights; using Microsoft.ApplicationInsights.DataContracts; using Microsoft.Azure; using Microsoft.ServiceBus; using Microsoft.WindowsAzure.ServiceRuntime; using Microsoft.WindowsAzure.Storage; using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Net; using System.Text; using System.Threading.Tasks; using VW.Azure.Trainer; using VW.Azure.Trainer.Checkpoint; namespace VW.Azure.Worker { internal sealed class OnlineTrainerSettingsWatcher : IDisposable { private readonly TelemetryClient telemetry; private readonly LearnEventProcessorHost trainProcessorHost; private OnlineTrainerSettingsDownloader settingsDownloader; private OnlineTrainerSettings metaData; internal OnlineTrainerSettingsWatcher(LearnEventProcessorHost trainProcessorHost) { this.telemetry = new TelemetryClient(); this.trainProcessorHost = trainProcessorHost; RoleEnvironment.Changed += RoleEnvironment_Changed; this.settingsDownloader = new OnlineTrainerSettingsDownloader(TimeSpan.FromSeconds(5)); this.settingsDownloader.Downloaded += AzureSettingsBlobDownloader_Downloaded; this.settingsDownloader.Failed += AzureSettingsBlobDownloader_Failed; } public void RestartTrainProcessorHost() { if (this.metaData == null) return; var settings = new OnlineTrainerSettingsInternal { StorageConnectionString = CloudConfigurationManager.GetSetting("StorageConnectionString"), JoinedEventHubConnectionString = CloudConfigurationManager.GetSetting("JoinedEventHubConnectionString"), EvalEventHubConnectionString = CloudConfigurationManager.GetSetting("EvalEventHubConnectionString"), Metadata = this.metaData, CheckpointPolicy = ParseCheckpointPolicy(), // make sure we ignore previous events EventHubStartDateTimeUtc = DateTime.UtcNow }; var joinedEventHubConsumerGroup = CloudConfigurationManager.GetSetting("JoinedEventHubConsumerGroup"); if (!string.IsNullOrEmpty(joinedEventHubConsumerGroup)) settings.JoinedEventHubConsumerGroup = joinedEventHubConsumerGroup; bool enableExampleTracing; if (bool.TryParse(CloudConfigurationManager.GetSetting("EnableExampleTracing"), out enableExampleTracing)) settings.EnableExampleTracing = enableExampleTracing; ServiceBusConnectionStringBuilder serviceBusConnectionStringBuilder; try { serviceBusConnectionStringBuilder = new ServiceBusConnectionStringBuilder(settings.JoinedEventHubConnectionString); } catch (Exception e) { throw new InvalidDataException($"Invalid JoinedEventHubConnectionString '{settings.JoinedEventHubConnectionString}' found in settings: {e.Message}"); } try { serviceBusConnectionStringBuilder = new ServiceBusConnectionStringBuilder(settings.EvalEventHubConnectionString); } catch (Exception e) { throw new InvalidDataException($"Invalid EventHubEvalConnectionString '{settings.EvalEventHubConnectionString}' found in settings: {e.Message}"); } if (string.IsNullOrEmpty(serviceBusConnectionStringBuilder.EntityPath)) throw new InvalidDataException($"Invalid EventHubEvalConnectionString '{settings.EvalEventHubConnectionString}' found in settings: EntityPath missing"); CloudStorageAccount cloudStorageAccount; if (!CloudStorageAccount.TryParse(settings.StorageConnectionString, out cloudStorageAccount)) throw new InvalidDataException($"Invalid StorageConnectionString '{settings.StorageConnectionString}' found in settings"); // fire and forget var task = this.trainProcessorHost.Restart(settings); } private void AzureSettingsBlobDownloader_Failed(object sender, Exception e) { this.telemetry.TrackException(e); } private void AzureSettingsBlobDownloader_Downloaded(object sender, byte[] data) { try { var json = Encoding.UTF8.GetString(data); this.metaData = JsonConvert.DeserializeObject(json); this.telemetry.TrackTrace( "Metadata update. Trigger restart", SeverityLevel.Information, new Dictionary { { "settings", json } }); this.RestartTrainProcessorHost(); } catch (Exception ex) { this.telemetry.TrackException(ex); } } private void RoleEnvironment_Changed(object sender, RoleEnvironmentChangedEventArgs e) { try { var changes = e.Changes.OfType() .Select(c => c.ConfigurationSettingName) .ToList(); if (changes.Count > 0) { this.telemetry.TrackTrace( "Configuration changes. Trigger restart", SeverityLevel.Information, changes.ToDictionary(name => name, name => CloudConfigurationManager.GetSetting(name))); // fire and forget this.RestartTrainProcessorHost(); } } catch (Exception ex) { this.telemetry.TrackException(ex); } } private ICheckpointPolicy ParseCheckpointPolicy() { var checkpointString = CloudConfigurationManager.GetSetting("CheckpointIntervalOrCount"); if (!string.IsNullOrWhiteSpace(checkpointString)) { if (checkpointString.Contains(":")) { TimeSpan interval; if (TimeSpan.TryParse(checkpointString, CultureInfo.InvariantCulture, out interval)) return new IntervalCheckpointPolicy(interval); } else { int syncCount; if (int.TryParse(checkpointString, NumberStyles.Integer, CultureInfo.InvariantCulture, out syncCount)) return new CountingCheckpointPolicy(syncCount); } } this.telemetry.TrackTrace("No valid checkpoint policy found. Defaulting to 5 minute wallclock checkpointing."); return new IntervalCheckpointPolicy(TimeSpan.FromMinutes(5)); } public void Dispose() { RoleEnvironment.Changed -= RoleEnvironment_Changed; if (this.settingsDownloader != null) { this.settingsDownloader.Dispose(); this.settingsDownloader = null; } } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure_worker/Properties/000077500000000000000000000000001332666127000225755ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/azure_worker/Properties/AssemblyInfo.cs000066400000000000000000000022131332666127000255150ustar00rootroot00000000000000//------------------------------------------------------------------------------ // // This code was generated by a tool. // Runtime Version:4.0.30319.42000 // // Changes to this file may cause incorrect behavior and will be lost if // the code is regenerated. // //------------------------------------------------------------------------------ [assembly: System.Reflection.AssemblyTitle("Vowpal Wabbit")] [assembly: System.Reflection.AssemblyDescription("Vowpal Wabbit")] [assembly: System.Reflection.AssemblyCompany("Microsoft Corp")] [assembly: System.Reflection.AssemblyProduct("Vowpal Wabbit")] [assembly: System.Reflection.AssemblyCopyright("Copyright (C) Microsoft Corp 2012-2016, Yahoo! Inc. 2007-2012, and many individua" + "l contributors. All rights reserved")] [assembly: System.Runtime.InteropServices.ComVisible(false)] [assembly: System.CLSCompliant(false)] [assembly: System.Runtime.InteropServices.Guid("6a577997-af00-4ca0-8453-fdc8bbdf2a57")] [assembly: System.Reflection.AssemblyVersion("8.4.0.1")] [assembly: System.Reflection.AssemblyFileVersion("8.4.0.1")] vowpal-wabbit-8.6.1.dfsg1/cs/azure_worker/ResetController.cs000066400000000000000000000065161332666127000241260ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Linq; using System.Net; using System.Net.Http; using System.Text; using System.Threading.Tasks; using System.Web.Http; using VW.Azure.Trainer; namespace VW.Azure.Worker { public sealed class ResetController : OnlineTrainerController { public ResetController(LearnEventProcessorHost trainProcessorFactory) : base(trainProcessorFactory) { } /// /// Vanilla reset. /// [HttpGet] public async Task Get() { if (!this.TryAuthorize()) return this.Request.CreateResponse(HttpStatusCode.Unauthorized); try { await this.trainProcessorHost.ResetModelAsync(); return this.Request.CreateResponse(HttpStatusCode.OK); } catch (Exception ex) { this.telemetry.TrackException(ex); return this.Request.CreateResponse(HttpStatusCode.InternalServerError, ex.Message); } } /// /// Reset optionally include EventHub position. /// [HttpPost] public async Task Post() { if (!this.TryAuthorize()) return this.Request.CreateResponse(HttpStatusCode.Unauthorized); try { OnlineTrainerState state = null; var body = await Request.Content.ReadAsStringAsync(); if (!string.IsNullOrWhiteSpace(body)) state = JsonConvert.DeserializeObject(body); await this.trainProcessorHost.ResetModelAsync(state); return this.Request.CreateResponse(HttpStatusCode.OK); } catch (Exception ex) { this.telemetry.TrackException(ex); return this.Request.CreateResponse(HttpStatusCode.InternalServerError, ex.Message); } } /// /// Reset including a warm started model. /// [HttpPut] public async Task Put() { if (!this.TryAuthorize()) return this.Request.CreateResponse(HttpStatusCode.Unauthorized); try { var model = await Request.Content.ReadAsByteArrayAsync(); await this.trainProcessorHost.ResetModelAsync(model: model); return this.Request.CreateResponse(HttpStatusCode.OK); } catch (Exception ex) { this.telemetry.TrackException(ex); return this.Request.CreateResponse(HttpStatusCode.InternalServerError, ex.Message); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure_worker/StatusController.cs000066400000000000000000000027171332666127000243260ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Newtonsoft.Json.Linq; using System.Linq; using System.Web.Http; using VW.Azure.Trainer; namespace VW.Azure.Worker { /// /// HTTP Front end to expose performance statistics. /// public class StatusController : ApiController { private LearnEventProcessorHost trainProcessorHost; public StatusController(LearnEventProcessorHost trainProcessorHost) { this.trainProcessorHost = trainProcessorHost; } public IHttpActionResult Get() { var perfCounts = this.trainProcessorHost.PerformanceCounters; if (perfCounts == null) return Json(new { Message = "Not yet initialized." }); var status = new JObject(perfCounts.All.Select(pc => new JProperty(pc.CounterName, pc.RawValue))); status.Add(new JProperty("LastStartDateTimeUtc", this.trainProcessorHost.LastStartDateTimeUtc)); return Json(status); } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure_worker/WorkerRole.cs000066400000000000000000000130431332666127000230640ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Microsoft.ApplicationInsights; using Microsoft.ApplicationInsights.DataContracts; using Microsoft.ApplicationInsights.Extensibility; using Microsoft.Azure; using Microsoft.Owin.Cors; using Microsoft.Owin.Hosting; using Microsoft.Practices.Unity; using Microsoft.WindowsAzure.ServiceRuntime; using Owin; using System; using System.Diagnostics; using System.Net; using System.Threading; using System.Web.Http; using System.Web.Http.Cors; using Unity.WebApi; using VW.Azure.Trainer; namespace VW.Azure.Worker { public class WorkerRole : RoleEntryPoint { private readonly ManualResetEventSlim stopEvent; private IDisposable webApp; private TelemetryClient telemetry; private OnlineTrainerSettingsWatcher settingsWatcher; private LearnEventProcessorHost trainProcesserHost; public WorkerRole() { this.stopEvent = new ManualResetEventSlim(); } public override bool OnStart() { try { // Set the maximum number of concurrent connections ServicePointManager.DefaultConnectionLimit = 128; // For information on handling configuration changes // see the MSDN topic at http://go.microsoft.com/fwlink/?LinkId=166357. bool result = base.OnStart(); TelemetryConfiguration.Active.InstrumentationKey = CloudConfigurationManager.GetSetting("APPINSIGHTS_INSTRUMENTATIONKEY"); //TelemetryConfiguration.Active.TelemetryChannel.DeveloperMode = true; this.telemetry = new TelemetryClient(); try { this.telemetry.TrackTrace("WorkerRole starting", SeverityLevel.Information); this.trainProcesserHost = new LearnEventProcessorHost(); this.settingsWatcher = new OnlineTrainerSettingsWatcher(this.trainProcesserHost); this.StartRESTAdminEndpoint(); } catch (Exception e) { this.telemetry.TrackException(e); // still start to give AppInsights a chance to log } return result; } catch (Exception e) { Debugger.Log(1, "ERROR", $"VowpalWabbit.AzureWorker failed to start: {e.Message} {e.StackTrace}"); throw; } } public override void Run() { try { // wait for OnStop this.stopEvent.Wait(); } catch (Exception e) { this.telemetry.TrackException(e); } } private void StartRESTAdminEndpoint() { // setup REST endpoint var endpoint = RoleEnvironment.CurrentRoleInstance.InstanceEndpoints["OnlineTrainer"]; string baseUri = String.Format("{0}://{1}", endpoint.Protocol, endpoint.IPEndpoint); this.webApp = WebApp.Start(baseUri, app => { var container = new UnityContainer(); // Register controller container.RegisterType(); container.RegisterType(); container.RegisterType(); // Register interface container.RegisterInstance(typeof(LearnEventProcessorHost), this.trainProcesserHost); var config = new HttpConfiguration(); config.DependencyResolver = new UnityDependencyResolver(container); config.EnableCors(new EnableCorsAttribute("*", "*", "*")); config.Routes.MapHttpRoute( "Default", "{controller}/{id}", new { id = RouteParameter.Optional }); // config.Services.Add(typeof(IExceptionLogger), new AiWebApiExceptionLogger()); app.UseCors(CorsOptions.AllowAll); app.UseWebApi(config); }); } public override void OnStop() { this.telemetry.TrackTrace("WorkerRole stopping", SeverityLevel.Information); try { this.stopEvent.Set(); if (this.settingsWatcher != null) { this.settingsWatcher.Dispose(); this.settingsWatcher = null; } if (this.trainProcesserHost != null) { this.trainProcesserHost.Dispose(); this.trainProcesserHost = null; } if (this.webApp != null) { this.webApp.Dispose(); this.webApp = null; } base.OnStop(); } catch (Exception e) { this.telemetry.TrackException(e); } this.telemetry.TrackTrace("WorkerRole stopped", SeverityLevel.Information); } } } vowpal-wabbit-8.6.1.dfsg1/cs/azure_worker/app.config000066400000000000000000000141701332666127000224130ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/azure_worker/azure_worker.csproj000066400000000000000000000455071332666127000244150ustar00rootroot00000000000000 Debug x64 {9DE727D8-FD1B-4144-8976-5371663D4C22} Library Properties VowpalWabbit.AzureWorker VowpalWabbit.AzureWorker Worker v4.6 512 ..\..\vowpalwabbit\ true ..\..\vowpalwabbit\x64\DebugRole\ DEBUG;TRACE full x64 prompt MinimumRecommendedRules.ruleset ..\..\vowpalwabbit\x64\ReleaseRole\ TRACE true pdbonly x64 prompt MinimumRecommendedRules.ruleset true ..\vw_key.snk $(SolutionDir)\packages\Microsoft.ApplicationInsights.Agent.Intercept.2.0.6\lib\net45\Microsoft.AI.Agent.Intercept.dll True $(SolutionDir)\packages\Microsoft.ApplicationInsights.DependencyCollector.2.2.0\lib\net45\Microsoft.AI.DependencyCollector.dll True $(SolutionDir)\packages\Microsoft.ApplicationInsights.PerfCounterCollector.2.2.0\lib\net45\Microsoft.AI.PerfCounterCollector.dll True $(SolutionDir)\packages\Microsoft.ApplicationInsights.WindowsServer.TelemetryChannel.2.2.0\lib\net45\Microsoft.AI.ServerTelemetryChannel.dll True $(SolutionDir)\packages\Microsoft.ApplicationInsights.Web.2.2.0\lib\net45\Microsoft.AI.Web.dll True $(SolutionDir)\packages\Microsoft.ApplicationInsights.WindowsServer.2.2.0\lib\net45\Microsoft.AI.WindowsServer.dll True $(SolutionDir)\packages\Microsoft.ApplicationInsights.2.2.0\lib\net46\Microsoft.ApplicationInsights.dll True ..\..\vowpalwabbit\packages\Microsoft.ApplicationInsights.TraceListener.2.2.0\lib\net45\Microsoft.ApplicationInsights.TraceListener.dll True $(SolutionDir)\packages\Microsoft.Azure.KeyVault.Core.2.0.4\lib\net45\Microsoft.Azure.KeyVault.Core.dll True $(SolutionDir)\packages\Microsoft.Data.Edm.5.8.1\lib\net40\Microsoft.Data.Edm.dll True $(SolutionDir)\packages\Microsoft.Data.OData.5.8.1\lib\net40\Microsoft.Data.OData.dll True $(SolutionDir)\packages\Microsoft.Data.Services.Client.5.8.1\lib\net40\Microsoft.Data.Services.Client.dll True $(SolutionDir)\packages\Microsoft.Owin.3.0.1\lib\net45\Microsoft.Owin.dll True $(SolutionDir)\packages\Microsoft.Owin.Cors.3.0.1\lib\net45\Microsoft.Owin.Cors.dll True $(SolutionDir)\packages\Microsoft.Owin.Host.HttpListener.3.0.1\lib\net45\Microsoft.Owin.Host.HttpListener.dll True $(SolutionDir)\packages\Microsoft.Owin.Hosting.3.0.1\lib\net45\Microsoft.Owin.Hosting.dll True $(SolutionDir)\packages\EnterpriseLibrary.TransientFaultHandling.6.0.1304.0\lib\portable-net45+win+wp8\Microsoft.Practices.EnterpriseLibrary.TransientFaultHandling.dll True $(SolutionDir)\packages\CommonServiceLocator.1.3\lib\portable-net4+sl5+netcore45+wpa81+wp8\Microsoft.Practices.ServiceLocation.dll True $(SolutionDir)\packages\Unity.4.0.1\lib\net45\Microsoft.Practices.Unity.dll True $(SolutionDir)\packages\Unity.4.0.1\lib\net45\Microsoft.Practices.Unity.Configuration.dll True $(SolutionDir)\packages\Unity.4.0.1\lib\net45\Microsoft.Practices.Unity.RegistrationByConvention.dll True $(SolutionDir)\packages\WindowsAzure.ServiceBus.3.4.3\lib\net45-full\Microsoft.ServiceBus.dll True $(SolutionDir)\packages\Microsoft.Azure.ServiceBus.EventProcessorHost.2.2.8\lib\net45-full\Microsoft.ServiceBus.Messaging.EventProcessorHost.dll True $(SolutionDir)\packages\Microsoft.WindowsAzure.ConfigurationManager.3.2.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll True $(SolutionDir)\packages\WindowsAzure.Storage.7.2.1\lib\net40\Microsoft.WindowsAzure.Storage.dll True $(SolutionDir)\packages\Newtonsoft.Json.9.0.1\lib\net45\Newtonsoft.Json.dll True $(SolutionDir)\packages\Owin.1.0\lib\net40\Owin.dll True $(SolutionDir)\packages\Microsoft.AspNet.WebApi.Client.5.2.3\lib\net45\System.Net.Http.Formatting.dll True $(SolutionDir)\packages\System.Reactive.Core.3.1.1\lib\net46\System.Reactive.Core.dll True $(SolutionDir)\packages\System.Reactive.Interfaces.3.1.1\lib\net45\System.Reactive.Interfaces.dll True $(SolutionDir)\packages\System.Reactive.Linq.3.1.1\lib\net46\System.Reactive.Linq.dll True $(SolutionDir)\packages\System.Reactive.PlatformServices.3.1.1\lib\net46\System.Reactive.PlatformServices.dll True $(SolutionDir)\packages\System.Reactive.Windows.Threading.3.1.1\lib\net45\System.Reactive.Windows.Threading.dll True $(SolutionDir)\packages\System.Spatial.5.8.1\lib\net40\System.Spatial.dll True $(SolutionDir)\packages\Microsoft.Tpl.Dataflow.4.5.24\lib\portable-net45+win8+wpa81\System.Threading.Tasks.Dataflow.dll True $(SolutionDir)\packages\Microsoft.AspNet.Cors.5.2.3\lib\net45\System.Web.Cors.dll True $(SolutionDir)\packages\Microsoft.AspNet.WebApi.Core.5.2.3\lib\net45\System.Web.Http.dll True $(SolutionDir)\packages\Microsoft.AspNet.WebApi.Cors.5.2.3\lib\net45\System.Web.Http.Cors.dll True $(SolutionDir)\packages\Microsoft.AspNet.WebApi.Owin.5.2.3\lib\net45\System.Web.Http.Owin.dll True $(SolutionDir)\packages\Unity.WebAPI.5.2.3\lib\net45\Unity.WebApi.dll True {43e32c1d-21d6-4be3-8fa5-d5ba379cba53} azure {85e55ae0-3784-4968-9271-c81af560e1c1} vw_clr {e621e022-c1f8-433f-905a-ab9a3de072b7} vw_common {e4e962ae-7056-4eb0-a8c5-8dc824a4b068} cs {9e27fa94-ab34-4736-8427-fb7a2ba90d52} cs_json vowpal-wabbit-8.6.1.dfsg1/cs/azure_worker/packages.config000066400000000000000000000072541332666127000234160ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/cli/000077500000000000000000000000001332666127000164715ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/cli/AssemblyInfo.cpp000066400000000000000000000014611332666127000215720ustar00rootroot00000000000000using namespace System; using namespace System::Reflection; using namespace System::Runtime::CompilerServices; using namespace System::Runtime::InteropServices; using namespace System::Security::Permissions; [assembly:AssemblyTitleAttribute("Vowpal Wabbit")]; [assembly:AssemblyDescriptionAttribute("")]; [assembly:AssemblyConfigurationAttribute("")]; [assembly:AssemblyCompanyAttribute("Microsoft Corp")]; [assembly:AssemblyProductAttribute("Vowpal Wabbit")]; [assembly:AssemblyCopyrightAttribute("Copyright (C) 2014")]; [assembly:AssemblyTrademarkAttribute("Copyright (C) Microsoft Corp 2012-2016, Yahoo! Inc. 2007-2012, and many individual contributors. All rights reserved")]; [assembly:AssemblyCultureAttribute("")]; [assembly:AssemblyVersionAttribute("8.4.0.1")]; [assembly:AssemblyFileVersion("8.4.0.1")]; vowpal-wabbit-8.6.1.dfsg1/cs/cli/Resource.rc000066400000000000000000000113661332666127000206150ustar00rootroot00000000000000// Microsoft Visual C++ generated resource script. // #include "resource.h" #define APSTUDIO_READONLY_SYMBOLS ///////////////////////////////////////////////////////////////////////////// // // Generated from the TEXTINCLUDE 2 resource. // #include "winres.h" ///////////////////////////////////////////////////////////////////////////// #undef APSTUDIO_READONLY_SYMBOLS ///////////////////////////////////////////////////////////////////////////// // English (United States) resources #if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US #pragma code_page(1252) #ifdef APSTUDIO_INVOKED ///////////////////////////////////////////////////////////////////////////// // // TEXTINCLUDE // 1 TEXTINCLUDE BEGIN "resource.h\0" END 2 TEXTINCLUDE BEGIN "#include ""winres.h""\r\n" "\0" END 3 TEXTINCLUDE BEGIN "\r\n" "\0" END #endif // APSTUDIO_INVOKED ///////////////////////////////////////////////////////////////////////////// // // Version // VS_VERSION_INFO VERSIONINFO FILEVERSION 8,4,0,1 PRODUCTVERSION 8,4,0,1 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L #else FILEFLAGS 0x0L #endif FILEOS 0x40004L FILETYPE 0x2L FILESUBTYPE 0x0L BEGIN BLOCK "StringFileInfo" BEGIN BLOCK "040904b0" BEGIN VALUE "CompanyName", "Microsoft Corp" VALUE "FileDescription", "Vowpal Wabbit" VALUE "FileVersion", "8.4.0.1" VALUE "InternalName", "VowpalWabbit.Core.dll" VALUE "LegalCopyright", "Copyright (C) Microsoft Corp 2012-2016, Yahoo! Inc. 2007-2012, and many individual contributors. All rights reserved" VALUE "OriginalFilename", "VowpalWabbit.Core.dll" VALUE "ProductName", "Vowpal Wabbit" VALUE "ProductVersion", "8.4.0.1" END END BLOCK "VarFileInfo" BEGIN VALUE "Translation", 0x409, 1200 END END #endif // English (United States) resources ///////////////////////////////////////////////////////////////////////////// #ifndef APSTUDIO_INVOKED ///////////////////////////////////////////////////////////////////////////// // // Generated from the TEXTINCLUDE 3 resource. // ///////////////////////////////////////////////////////////////////////////// #endif // not APSTUDIO_INVOKED vowpal-wabbit-8.6.1.dfsg1/cs/cli/clr_io.cpp000066400000000000000000000032241332666127000204450ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #include #include "clr_io.h" using namespace System; using namespace System::Runtime::InteropServices; namespace VW { clr_io_buf::clr_io_buf(Stream^ stream) : m_stream(stream), m_buffer(nullptr) { if (stream == nullptr) throw gcnew ArgumentNullException("stream"); files.push_back(0); } void clr_io_buf::ensure_buffer_size(size_t nbytes) { if (m_buffer != nullptr && m_buffer->Length >= nbytes) return; m_buffer = gcnew array((int)nbytes); } int clr_io_buf::open_file(const char* name, bool stdin_off, int flag) { return 0; } void clr_io_buf::reset_file(int f) { m_stream->Seek(0, SeekOrigin::Begin); head = space.begin(); space.end() = space.begin(); } ssize_t clr_io_buf::read_file(int f, void* buf, size_t nbytes) { ensure_buffer_size(nbytes); int readBytes = m_stream->Read(m_buffer, 0, (int)nbytes); Marshal::Copy(m_buffer, 0, IntPtr(buf), (int)nbytes); return readBytes; } size_t clr_io_buf::num_files() { return 1; } ssize_t clr_io_buf::write_file(int file, const void* buf, size_t nbytes) { ensure_buffer_size(nbytes); Marshal::Copy(IntPtr((void*)buf), m_buffer, 0, (int)nbytes); m_stream->Write(m_buffer, 0, (int)nbytes); return nbytes; } bool clr_io_buf::compressed() { return false; } void clr_io_buf::flush() { io_buf::flush(); m_stream->Flush(); } bool clr_io_buf::close_file() { // don't close stream on purpose. Caller of SaveModel should have control when to close. return true; } } vowpal-wabbit-8.6.1.dfsg1/cs/cli/clr_io.h000066400000000000000000000020011332666127000201020ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #include "io_buf.h" using namespace System; using namespace System::IO; namespace VW { /// /// C++ wrapper for managed . /// class clr_io_buf : public io_buf { private: gcroot m_stream; gcroot^> m_buffer; void ensure_buffer_size(size_t nbytes); public: /// /// Initializes a new instance. /// clr_io_buf(Stream^ stream); virtual int open_file(const char* name, bool stdin_off, int flag = READ); virtual void reset_file(int f); virtual ssize_t read_file(int f, void* buf, size_t nbytes); virtual size_t num_files(); virtual ssize_t write_file(int file, const void* buf, size_t nbytes); virtual bool compressed(); virtual void flush(); virtual bool close_file(); }; } vowpal-wabbit-8.6.1.dfsg1/cs/cli/clr_io_memory.cpp000066400000000000000000000022141332666127000220330ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #include "clr_io_memory.h" namespace VW { clr_io_memory_buf::clr_io_memory_buf() { files.push_back(0); m_iterator = m_data.begin(); } int clr_io_memory_buf::open_file(const char* name, bool stdin_off, int flag) { m_iterator = m_data.begin(); return 0; } void clr_io_memory_buf::reset_file(int f) { size_t count = m_data.size(); m_iterator = m_data.begin(); } ssize_t clr_io_memory_buf::read_file(int f, void* buf, size_t nbytes) { size_t left_over = min(nbytes, m_data.end() - m_iterator); if (left_over == 0) return 0; memcpy_s(buf, nbytes, &*m_iterator, left_over); m_iterator += left_over; return left_over; } size_t clr_io_memory_buf::num_files() { return 1; } ssize_t clr_io_memory_buf::write_file(int file, const void* buf, size_t nbytes) { m_data.insert(m_data.end(), (char*)buf, (char*)buf + nbytes); return nbytes; } bool clr_io_memory_buf::compressed() { return false; } bool clr_io_memory_buf::close_file() { return true; } } vowpal-wabbit-8.6.1.dfsg1/cs/cli/clr_io_memory.h000066400000000000000000000016661332666127000215120ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #include "io_buf.h" #include namespace VW { /// /// IO Buffer keeping data in memory. Used by VowpalWabbit::Reload. /// class clr_io_memory_buf : public io_buf { private: std::vector m_data; std::vector::const_iterator m_iterator; public: /// /// Initializes a new instance. /// clr_io_memory_buf(); virtual int open_file(const char* name, bool stdin_off, int flag = READ); virtual void reset_file(int f); virtual ssize_t read_file(int f, void* buf, size_t nbytes); virtual size_t num_files(); virtual ssize_t write_file(int file, const void* buf, size_t nbytes); virtual bool compressed(); virtual bool close_file(); }; } vowpal-wabbit-8.6.1.dfsg1/cs/cli/cs_vld.vcxproj000066400000000000000000000207321332666127000213640ustar00rootroot00000000000000 DebugLeakCheck x64 Debug x64 Release x64 {3D57A6AF-DE8C-40DC-ABDE-E4CE1B9C0D20} v4.5.2 ManagedCProj cs_vld ..\vowpalwabbit\ ..\..\sdl\SDL-7.0-Recommended.ruleset true DynamicLibrary true v140 true Unicode DynamicLibrary true v140 true Unicode true DynamicLibrary false v140 true Unicode true $(Platform)\$(Configuration)\$(ProjectName)\ true $(Platform)\$(Configuration)\$(ProjectName)\ false $(Platform)\$(Configuration)\$(ProjectName)\ Level3 Disabled WIN32;_DEBUG;%(PreprocessorDefinitions) NotUsing %(AdditionalIncludeDirectories) true %(AdditionalLibraryDirectories) %(AdditionalManifestFiles) Level3 Disabled LEAKCHECK;WIN32;_DEBUG;%(PreprocessorDefinitions) NotUsing %(AdditionalIncludeDirectories) true %(AdditionalLibraryDirectories) %(AdditionalManifestFiles) Level3 WIN32;NDEBUG;%(PreprocessorDefinitions) NotUsing %(AdditionalIncludeDirectories) true %(AdditionalLibraryDirectories) This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. vowpal-wabbit-8.6.1.dfsg1/cs/cli/cs_vld.vcxproj.filters000066400000000000000000000021441332666127000230300ustar00rootroot00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hh;hpp;hxx;hm;inl;inc;xsd {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms Header Files Source Files vowpal-wabbit-8.6.1.dfsg1/cs/cli/packages.config000066400000000000000000000003731332666127000214410ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/cli/resource.h000066400000000000000000000006031332666127000204700ustar00rootroot00000000000000//{{NO_DEPENDENCIES}} // Microsoft Visual C++ generated include file. // Used by Resource.rc // Next default values for new objects // #ifdef APSTUDIO_INVOKED #ifndef APSTUDIO_READONLY_SYMBOLS #define _APS_NEXT_RESOURCE_VALUE 101 #define _APS_NEXT_COMMAND_VALUE 40001 #define _APS_NEXT_CONTROL_VALUE 1001 #define _APS_NEXT_SYMED_VALUE 101 #endif #endif vowpal-wabbit-8.6.1.dfsg1/cs/cli/spanning_tree_clr.cpp000066400000000000000000000013021332666127000226650ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #include "spanning_tree_clr.h" #include "vw_clr.h" using namespace std; namespace VW { SpanningTreeClr::SpanningTreeClr() { m_spanningTree = new SpanningTree; } SpanningTreeClr::~SpanningTreeClr() { try { delete m_spanningTree; } CATCHRETHROW } void SpanningTreeClr::Start() { try { m_spanningTree->Start(); } CATCHRETHROW } void SpanningTreeClr::Stop() { try { m_spanningTree->Stop(); } CATCHRETHROW } void SpanningTreeClr::Run() { try { m_spanningTree->Run(); } CATCHRETHROW } } vowpal-wabbit-8.6.1.dfsg1/cs/cli/spanning_tree_clr.h000066400000000000000000000015231332666127000223370ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #pragma once #include "spanning_tree.h" namespace VW { /// /// Managed wrapper for AllReduce spanning tree implementation. /// public ref class SpanningTreeClr { private: SpanningTree* m_spanningTree; public: /// /// Initializes a new instance. /// SpanningTreeClr(); ~SpanningTreeClr(); /// /// Starts the server on a background thread. /// void Start(); /// /// Runs the server on the calling thread. /// void Run(); /// /// Stops the background thread. /// void Stop(); }; } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vld_clr.cpp000066400000000000000000000022561332666127000206270ustar00rootroot00000000000000#include "vld_clr.h" namespace VLD { int VldReportHook(int reportType, wchar_t *message, int *returnValue) { auto msg = gcnew String(message); System::Diagnostics::Debug::Write(msg); if (VisualLeakDetector::Instance) VisualLeakDetector::Instance->ReportInternal(reportType, msg); *returnValue = 0; /* don't debug break */ return 1; /* handled */ } VisualLeakDetector::VisualLeakDetector() : m_messages(gcnew List^>) { if (Instance != nullptr) { throw gcnew NotSupportedException("Only a single instance is supported."); } Instance = this; VLDSetReportHook(VLD_RPTHOOK_INSTALL, VldReportHook); } VisualLeakDetector::~VisualLeakDetector() { this->!VisualLeakDetector(); } VisualLeakDetector::!VisualLeakDetector() { VLDSetReportHook(VLD_RPTHOOK_REMOVE, VldReportHook); Instance = nullptr; } void VisualLeakDetector::ReportInternal(int reportType, String^ msg) { m_messages->Add(Tuple::Create(reportType, msg)); } void VisualLeakDetector::ReportLeaks() { VLDReportLeaks(); } List^>^ VisualLeakDetector::Messages::get() { return m_messages; } void VisualLeakDetector::MarkAllLeaksAsReported() { VLDMarkAllLeaksAsReported(); } } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vld_clr.h000066400000000000000000000012641332666127000202720ustar00rootroot00000000000000#include #include using namespace System; using namespace System::Collections::Generic; using namespace System::Runtime::InteropServices; namespace VLD { int VldReportHook(int reportType, wchar_t *message, int *returnValue); public ref class VisualLeakDetector { private: initonly List^>^ m_messages; !VisualLeakDetector(); public: VisualLeakDetector(); ~VisualLeakDetector(); static VisualLeakDetector^ Instance; void ReportInternal(int reportType, String^ msg); property List^>^ Messages { List^>^ get(); } void ReportLeaks(); void MarkAllLeaksAsReported(); }; } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vowpalwabbit.cpp000066400000000000000000000515001332666127000216770ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #include "vw_clr.h" #include "vowpalwabbit.h" #include "best_constant.h" #include "parser.h" #include "hash.h" #include "vw_example.h" #include "vw_allreduce.h" #include "vw_builder.h" #include "clr_io.h" #include "lda_core.h" #include "parse_example.h" #include "parse_example_json.h" using namespace System; using namespace System::Collections::Generic; using namespace System::Text; namespace VW { VowpalWabbit::VowpalWabbit(VowpalWabbitSettings^ settings) : VowpalWabbitBase(settings) { if (settings == nullptr) { throw gcnew ArgumentNullException("settings"); } if (settings->ParallelOptions != nullptr) { m_vw->all_reduce_type = AllReduceType::Thread; auto total = settings->ParallelOptions->MaxDegreeOfParallelism; if (settings->Root == nullptr) { m_vw->all_reduce = new AllReduceThreads(total, settings->Node); } else { auto parent_all_reduce = (AllReduceThreads*)settings->Root->m_vw->all_reduce; m_vw->all_reduce = new AllReduceThreads(parent_all_reduce, total, settings->Node); } } try { m_hasher = GetHasher(); } CATCHRETHROW } VowpalWabbit::VowpalWabbit(String^ args) : VowpalWabbit(gcnew VowpalWabbitSettings(args)) { } void VowpalWabbit::Driver() { try { LEARNER::generic_driver(*m_vw); } CATCHRETHROW } void VowpalWabbit::RunMultiPass() { if (m_vw->numpasses > 1) { try { adjust_used_index(*m_vw); m_vw->do_reset_source = true; VW::start_parser(*m_vw); LEARNER::generic_driver(*m_vw); VW::end_parser(*m_vw); } CATCHRETHROW } } VowpalWabbitPerformanceStatistics^ VowpalWabbit::PerformanceStatistics::get() { // see parse_args.cc:finish(...) auto stats = gcnew VowpalWabbitPerformanceStatistics(); if (m_vw->current_pass == 0) { stats->NumberOfExamplesPerPass = m_vw->sd->example_number; } else { stats->NumberOfExamplesPerPass = m_vw->sd->example_number / m_vw->current_pass; } stats->WeightedExampleSum = m_vw->sd->weighted_examples(); stats->WeightedLabelSum = m_vw->sd->weighted_labels; if (m_vw->holdout_set_off) if (m_vw->sd->weighted_labeled_examples > 0) stats->AverageLoss = m_vw->sd->sum_loss / m_vw->sd->weighted_labeled_examples; else stats->AverageLoss = System::Double::NaN; else if ((m_vw->sd->holdout_best_loss == FLT_MAX) || (m_vw->sd->holdout_best_loss == FLT_MAX * 0.5)) stats->AverageLoss = System::Double::NaN; else stats->AverageLoss = m_vw->sd->holdout_best_loss; float best_constant; float best_constant_loss; if (get_best_constant(*m_vw, best_constant, best_constant_loss)) { stats->BestConstant = best_constant; if (best_constant_loss != FLT_MIN) { stats->BestConstantLoss = best_constant_loss; } } stats->TotalNumberOfFeatures = m_vw->sd->total_features; return stats; } uint64_t VowpalWabbit::HashSpace(String^ s) { auto newHash = m_hasher(s, 0); #ifdef _DEBUG auto oldHash = HashSpaceNative(s); assert(newHash == oldHash); #endif return (uint32_t)newHash; } uint64_t VowpalWabbit::HashFeature(String^ s, size_t u) { auto newHash = m_hasher(s, u) & m_vw->parse_mask; #ifdef _DEBUG auto oldHash = HashFeatureNative(s, u); assert(newHash == oldHash); #endif return (uint64_t)newHash; } uint64_t VowpalWabbit::HashSpaceNative(String^ s) { auto bytes = System::Text::Encoding::UTF8->GetBytes(s); auto handle = GCHandle::Alloc(bytes, GCHandleType::Pinned); try { return VW::hash_space(*m_vw, reinterpret_cast(handle.AddrOfPinnedObject().ToPointer())); } CATCHRETHROW finally { handle.Free(); } } uint64_t VowpalWabbit::HashFeatureNative(String^ s, uint64_t u) { auto bytes = System::Text::Encoding::UTF8->GetBytes(s); auto handle = GCHandle::Alloc(bytes, GCHandleType::Pinned); try { return VW::hash_feature(*m_vw, reinterpret_cast(handle.AddrOfPinnedObject().ToPointer()), u); } CATCHRETHROW finally { handle.Free(); } } void VowpalWabbit::Learn(List^ examples) { multi_ex ex_coll; try { for each (auto ex in examples) { example* pex = ex->m_example; ex_coll.push_back(pex); } m_vw->learn(ex_coll); // as this is not a ring-based example it is not freed as_multiline(m_vw->l)->finish_example(*m_vw, ex_coll); } CATCHRETHROW finally{ } } void VowpalWabbit::Predict(List^ examples) { multi_ex ex_coll; try { for each (auto ex in examples) { example* pex = ex->m_example; ex_coll.push_back(pex); } as_multiline(m_vw->l)->predict(ex_coll); // as this is not a ring-based example it is not freed as_multiline(m_vw->l)->finish_example(*m_vw, ex_coll); } CATCHRETHROW finally{ } } void VowpalWabbit::Learn(VowpalWabbitExample^ ex) { #if _DEBUG if (ex == nullptr) { throw gcnew ArgumentNullException("ex"); } #endif try { m_vw->learn(*ex->m_example); // as this is not a ring-based example it is not free'd as_singleline(m_vw->l)->finish_example(*m_vw, *ex->m_example); } CATCHRETHROW } generic T VowpalWabbit::Learn(VowpalWabbitExample^ ex, IVowpalWabbitPredictionFactory^ predictionFactory) { #if _DEBUG if (ex == nullptr) throw gcnew ArgumentNullException("ex"); if (nullptr == predictionFactory) throw gcnew ArgumentNullException("predictionFactory"); #endif try { m_vw->learn(*ex->m_example); auto prediction = predictionFactory->Create(m_vw, ex->m_example); // as this is not a ring-based example it is not free'd as_singleline(m_vw->l)->finish_example(*m_vw, *ex->m_example); return prediction; } CATCHRETHROW } void VowpalWabbit::Predict(VowpalWabbitExample^ ex) { #if _DEBUG if (ex == nullptr) throw gcnew ArgumentNullException("ex"); #endif try { as_singleline(m_vw->l)->predict(*ex->m_example); // as this is not a ring-based example it is not free'd as_singleline(m_vw->l)->finish_example(*m_vw, *ex->m_example); } CATCHRETHROW } generic T VowpalWabbit::Predict(VowpalWabbitExample^ ex, IVowpalWabbitPredictionFactory^ predictionFactory) { #if _DEBUG if (ex == nullptr) throw gcnew ArgumentNullException("ex"); #endif try { as_singleline(m_vw->l)->predict(*ex->m_example); auto prediction = predictionFactory->Create(m_vw, ex->m_example); // as this is not a ring-based example it is not free'd as_singleline(m_vw->l)->finish_example(*m_vw, *ex->m_example); return prediction; } CATCHRETHROW } public ref struct ParseJsonState { VowpalWabbit^ vw; List^ examples; }; example& get_example_from_pool(void* v) { interior_ptr state = (interior_ptr)v; auto ex = (*state)->vw->GetOrCreateNativeExample(); (*state)->examples->Add(ex); return *ex->m_example; } List^ VowpalWabbit::ParseDecisionServiceJson(cli::array^ json, int offset, int length, bool copyJson, [Out] VowpalWabbitDecisionServiceInteractionHeader^% header) { #if _DEBUG if (json == nullptr) throw gcnew ArgumentNullException("json"); #endif try { header = gcnew VowpalWabbitDecisionServiceInteractionHeader(); ParseJsonState^ state = gcnew ParseJsonState(); state->vw = this; state->examples = gcnew List(); try { auto ex = GetOrCreateNativeExample(); state->examples->Add(ex); v_array examples = v_init(); example* native_example = ex->m_example; examples.push_back(native_example); interior_ptr state_ptr = &state; pin_ptr data = &json[0]; data += offset; DecisionServiceInteraction interaction; if (m_vw->audit) VW::read_line_decision_service_json(*m_vw, examples, reinterpret_cast(data), length, copyJson, get_example_from_pool, &state, &interaction); else VW::read_line_decision_service_json(*m_vw, examples, reinterpret_cast(data), length, copyJson, get_example_from_pool, &state, &interaction); // finalize example VW::setup_examples(*m_vw, examples); // delete native array of pointers, keep examples examples.delete_v(); header->EventId = gcnew String(interaction.eventId.c_str()); header->Actions = gcnew cli::array((int)interaction.actions.size()); int index = 0; for (auto a : interaction.actions) header->Actions[index++] = (int)a; header->Probabilities = gcnew cli::array((int)interaction.probabilities.size()); index = 0; for (auto p : interaction.probabilities) header->Probabilities[index++] = p; header->ProbabilityOfDrop = interaction.probabilityOfDrop; return state->examples; } catch (...) { // cleanup for each (auto ex in state->examples) delete ex; throw; } } CATCHRETHROW } List^ VowpalWabbit::ParseJson(String^ line) { #if _DEBUG if (line == nullptr) throw gcnew ArgumentNullException("line"); #endif auto bytes = System::Text::Encoding::UTF8->GetBytes(line); auto valueHandle = GCHandle::Alloc(bytes, GCHandleType::Pinned); try { ParseJsonState^ state = gcnew ParseJsonState(); state->vw = this; state->examples = gcnew List(); try { auto ex = GetOrCreateNativeExample(); state->examples->Add(ex); v_array examples = v_init(); example* native_example = ex->m_example; examples.push_back(native_example); interior_ptr state_ptr = &state; if (m_vw->audit) VW::read_line_json(*m_vw, examples, reinterpret_cast(valueHandle.AddrOfPinnedObject().ToPointer()), get_example_from_pool, &state); else VW::read_line_json(*m_vw, examples, reinterpret_cast(valueHandle.AddrOfPinnedObject().ToPointer()), get_example_from_pool, &state); // finalize example VW::setup_examples(*m_vw, examples); // remember the input string for debugging purposes ex->VowpalWabbitString = line; return state->examples; } catch (...) { // cleanup for each (auto ex in state->examples) delete ex; throw; } } CATCHRETHROW finally { valueHandle.Free(); } } VowpalWabbitExample^ VowpalWabbit::ParseLine(String^ line) { #if _DEBUG if (line == nullptr) throw gcnew ArgumentNullException("line"); #endif auto ex = GetOrCreateNativeExample(); auto bytes = System::Text::Encoding::UTF8->GetBytes(line); auto valueHandle = GCHandle::Alloc(bytes, GCHandleType::Pinned); try { try { VW::read_line(*m_vw, ex->m_example, reinterpret_cast(valueHandle.AddrOfPinnedObject().ToPointer())); // finalize example VW::setup_example(*m_vw, ex->m_example); // remember the input string for debugging purposes ex->VowpalWabbitString = line; return ex; } catch (...) { delete ex; throw; } } CATCHRETHROW finally { valueHandle.Free(); } } void VowpalWabbit::Learn(String^ line) { #if _DEBUG if (String::IsNullOrEmpty(line)) throw gcnew ArgumentException("lines must not be empty. For multi-line examples use Learn(IEnumerable) overload."); #endif VowpalWabbitExample^ example = nullptr; try { example = ParseLine(line); Learn(example); } finally { delete example; } } void VowpalWabbit::Predict(String^ line) { #if _DEBUG if (String::IsNullOrEmpty(line)) throw gcnew ArgumentException("lines must not be empty. For multi-line examples use Predict(IEnumerable) overload."); #endif VowpalWabbitExample^ example = nullptr; try { example = ParseLine(line); Predict(example); } finally { delete example; } } generic TPrediction VowpalWabbit::Learn(String^ line, IVowpalWabbitPredictionFactory^ predictionFactory) { #if _DEBUG if (String::IsNullOrEmpty(line)) throw gcnew ArgumentException("lines must not be empty. For multi-line examples use Learn(IEnumerable) overload."); #endif VowpalWabbitExample^ example = nullptr; try { example = ParseLine(line); return Learn(example, predictionFactory); } finally { delete example; } } generic T VowpalWabbit::Predict(String^ line, IVowpalWabbitPredictionFactory^ predictionFactory) { #if _DEBUG if (String::IsNullOrEmpty(line)) throw gcnew ArgumentException("lines must not be empty. For multi-line examples use Learn(IEnumerable) overload."); #endif VowpalWabbitExample^ example = nullptr; try { example = ParseLine(line); return Predict(example, predictionFactory); } finally { delete example; } } void VowpalWabbit::CacheEmptyLine() { auto empty = GetOrCreateNativeExample(); empty->MakeEmpty(this); ReturnExampleToPool(empty); } void VowpalWabbit::Learn(IEnumerable^ lines) { #if _DEBUG if (lines == nullptr) throw gcnew ArgumentNullException("lines"); #endif auto examples = gcnew List; try { for each (auto line in lines) { auto ex = ParseLine(line); examples->Add(ex); } // Need to add an empty line to cache file CacheEmptyLine(); Learn(examples); } finally { for each (auto ex in examples) { delete ex; } } } void VowpalWabbit::Predict(IEnumerable^ lines) { #if _DEBUG if (lines == nullptr) throw gcnew ArgumentNullException("lines"); #endif auto examples = gcnew List; try { for each (auto line in lines) { auto ex = ParseLine(line); examples->Add(ex); } // Need to add an empty line to cache file CacheEmptyLine(); Predict(examples); } finally { for each (auto ex in examples) { delete ex; } } } generic T VowpalWabbit::Learn(IEnumerable^ lines, IVowpalWabbitPredictionFactory^ predictionFactory) { #if _DEBUG if (lines == nullptr) throw gcnew ArgumentNullException("lines"); #endif auto examples = gcnew List; try { for each (auto line in lines) { auto ex = ParseLine(line); examples->Add(ex); Learn(ex); } auto empty = GetOrCreateNativeExample(); examples->Add(empty); empty->MakeEmpty(this); Learn(empty); return examples[0]->GetPrediction(this, predictionFactory); } finally { for each (auto ex in examples) { delete ex; } } } generic T VowpalWabbit::Predict(IEnumerable^ lines, IVowpalWabbitPredictionFactory^ predictionFactory) { #if _DEBUG if (lines == nullptr) throw gcnew ArgumentNullException("lines"); #endif auto examples = gcnew List; try { for each (auto line in lines) { auto ex = ParseLine(line); examples->Add(ex); Predict(ex); } auto empty = GetOrCreateNativeExample(); examples->Add(empty); empty->MakeEmpty(this); Predict(empty); return examples[0]->GetPrediction(this, predictionFactory); } finally { for each (auto ex in examples) { delete ex; } } } void VowpalWabbit::EndOfPass() { try { m_vw->l->end_pass(); sync_stats(*m_vw); } CATCHRETHROW } /// /// Hashes the given value . /// /// String to be hashed. /// Hash offset. /// The resulting hash code. //template uint64_t hashall(String^ s, int offset, int count, uint64_t u) { // get raw bytes from string auto keys = gcnew cli::array(Encoding::UTF8->GetMaxByteCount(count)); int length = Encoding::UTF8->GetBytes(s, offset, count, keys, 0); // TOOD: benchmark and verify correctness //if (replaceSpace) //{ // for (int j = 0; j < length;) // { // var k = keys[j]; // if (k == ' ') // { // keys[j] = '_'; // } // j++; // // take care of UTF-8 multi-byte characters // while (k & 0xC == 0xC) // { // j++; // k <<= 1; // } // } //} uint32_t h1 = (uint32_t)u; uint32_t k1 = 0; const uint32_t c1 = 0xcc9e2d51; const uint32_t c2 = 0x1b873593; int i = 0; while (i <= length - 4) { // convert byte array to integer k1 = (uint32_t)(keys[i] | keys[i + 1] << 8 | keys[i + 2] << 16 | keys[i + 3] << 24); k1 *= c1; k1 = ROTL32(k1, 15); k1 *= c2; h1 ^= k1; h1 = ROTL32(h1, 13); h1 = h1 * 5 + 0xe6546b64; i += 4; } k1 = 0; int tail = length - length % 4; switch (length & 3) { case 3: k1 ^= (uint32_t)(keys[tail + 2] << 16); case 2: k1 ^= (uint32_t)(keys[tail + 1] << 8); case 1: k1 ^= (uint32_t)(keys[tail]); k1 *= c1; k1 = ROTL32(k1, 15); k1 *= c2; h1 ^= k1; break; } // finalization h1 ^= (uint32_t)length; return MURMUR_HASH_3::fmix(h1); } uint64_t hashall(String^ s, uint64_t u) { return hashall(s, 0, s->Length, u); } /// /// Hashes the given value . /// /// String to be hashed. /// Hash offset. /// The resulting hash code. size_t hashstring(String^ s, size_t u) { int offset = 0; int end = s->Length; if (end == 0) return u; //trim leading whitespace but not UTF-8 for (; offset < s->Length && s[offset] <= 0x20; offset++); for (; end >= offset && s[end - 1] <= 0x20; end--); int sInt = 0; for (int i = offset; i < end; i++) { auto c = s[i]; if (c >= '0' && c <= '9') sInt = 10 * sInt + (c - '0'); else return hashall(s, offset, end - offset, u); } return sInt + u; } Func^ VowpalWabbit::GetHasher() { //feature manipulation string hash_function("strings"); if (m_vw->opts_n_args.vm.count("hash")) { hash_function = m_vw->opts_n_args.vm["hash"].as(); } if (hash_function == "strings") { return gcnew Func(&hashstring); } else if (hash_function == "all") { return gcnew Func(&hashall); } else { THROW("Unsupported hash function: " << hash_function); } } VowpalWabbit^ VowpalWabbit::Native::get() { return this; } VowpalWabbitExample^ VowpalWabbit::GetOrCreateNativeExample() { auto ex = m_examples->Remove(); if (ex == nullptr) { try { auto ex = VW::alloc_examples(0, 1); m_vw->p->lp.default_label(&ex->l); return gcnew VowpalWabbitExample(this, ex); } CATCHRETHROW } try { VW::empty_example(*m_vw, *ex->m_example); m_vw->p->lp.default_label(&ex->m_example->l); return ex; } CATCHRETHROW } void VowpalWabbit::ReturnExampleToPool(VowpalWabbitExample^ ex) { #if _DEBUG if (m_vw == nullptr) throw gcnew ObjectDisposedException("VowpalWabbitExample was not properly disposed as the owner is already disposed"); #endif if (ex == nullptr) throw gcnew ArgumentNullException("ex"); // make sure we're not a ring based example assert(!VW::is_ring_example(*m_vw, ex->m_example)); // the bag might have reached it's limit if (m_examples != nullptr) { if (!m_examples->TryAdd(ex)) DisposeExample(ex); } #if _DEBUG else // this should not happen as m_vw is already set to null throw gcnew ObjectDisposedException("VowpalWabbitExample was disposed after the owner is disposed"); #endif } cli::array^>^ VowpalWabbit::GetTopicAllocation(int top) { uint64_t length = (uint64_t)1 << m_vw->num_bits; // using jagged array to enable LINQ auto K = (int)m_vw->lda; auto allocation = gcnew cli::array^>(K); // TODO: better way of peaking into lda? auto lda_rho = m_vw->opts_n_args.vm["lda_rho"].as(); std::vector top_weights; // over topics for (int topic = 0; topic < K; topic++) { get_top_weights(m_vw, top, topic, top_weights); auto clr_weights = gcnew List(top); allocation[topic] = clr_weights; for (auto& pair : top_weights) clr_weights->Add(gcnew VowpalWabbitFeature(this, pair.x, pair.weight_index)); } return allocation; } template cli::array^>^ VowpalWabbit::FillTopicAllocation(T& weights) { uint64_t length = (uint64_t)1 << m_vw->num_bits; // using jagged array to enable LINQ auto K = (int)m_vw->lda; auto allocation = gcnew cli::array^>(K); for (int k = 0; k < K; k++) allocation[k] = gcnew cli::array((int)length); // TODO: better way of peaking into lda? auto lda_rho = m_vw->opts_n_args.vm["lda_rho"].as(); for (auto iter = weights.begin(); iter != weights.end(); ++iter) { // over topics weight* wp = &(*iter); for (uint64_t k = 0; k < K; k++) allocation[(int)k][(int)iter.index()] = wp[k] + lda_rho; } return allocation; } cli::array^>^ VowpalWabbit::GetTopicAllocation() { // over weights if (m_vw->weights.sparse) return FillTopicAllocation(m_vw->weights.sparse_weights); else return FillTopicAllocation(m_vw->weights.dense_weights); } } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vowpalwabbit.h000066400000000000000000000253411332666127000213500ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #pragma once #include "vw_clr.h" #include "vw_base.h" #include "vw_model.h" #include "vw_prediction.h" #include "vw_interface.h" namespace VW { ref class VowpalWabbitExampleBuilder; ref struct VowpalWabbitFeature; /// /// Simple string example based wrapper for vowpal wabbit. /// /// If possible use VowpalWabbit{T} types as this wrapper suffers from marshalling performance wise. public ref class VowpalWabbit : VowpalWabbitBase, IVowpalWabbitExamplePool { private: /// /// Select the right hash method based on args. /// Func^ GetHasher(); /// /// The selected hasher method. /// /// /// Avoiding if-else for hash function selection. Delegates outperform function pointers according to http://stackoverflow.com/questions/13443250/performance-of-c-cli-function-pointers-versus-net-delegates /// initonly Func^ m_hasher; template cli::array^>^ FillTopicAllocation(T& weights); /// /// Write and empty line example to vw cache file. /// /// /// This is used to emit empty lines to cache while handling multiline examples. /// Used internally by Learn(IEnumerable lines) /// void CacheEmptyLine(); public: /// /// Initializes a new instance. /// /// The settings. VowpalWabbit(VowpalWabbitSettings^ settings); /// /// Initializes a new instance. /// /// Command line arguments. VowpalWabbit(String^ args); /// /// Run multi-passe training. /// void RunMultiPass(); /// /// Gets Collected performance statistics. /// property VowpalWabbitPerformanceStatistics^ PerformanceStatistics { VowpalWabbitPerformanceStatistics^ get(); } /// /// Parses using the C++ parser. /// /// /// Returns a ready to be used for or . /// VowpalWabbitExample^ ParseLine(String^ line); /// /// Parses using the C++ parser. /// TODO: this should return VowpalWabbitExampleCollection, but that would require moving VowpalWaabitExampleCollection to C++/CLI /// /// /// Returns a ready to be used for or . /// List^ ParseJson(String^ line); /// /// Parses using the C++ parser and supports the extra wrapping introduced by Decision Service. /// TODO: this should return VowpalWabbitExampleCollection, but that would require moving VowpalWaabitExampleCollection to C++/CLI /// TODO: the header should be passed along with the List of VowpalWabbit examples, but that requires additional care wrt disposing items. /// /// This needs to be null-terminated string. /// If true the json array is copied prior to destructive parsing /// /// Returns a ready to be used for or . /// List^ VowpalWabbit::ParseDecisionServiceJson(cli::array^ json, int offset, int length, bool copyJson, [Out] VowpalWabbitDecisionServiceInteractionHeader^% header); /// /// Hashes the given namespace . /// /// String to be hashed. /// The resulting hash code. /// The hash code depends on the vowpal wabbit instance as different has functions can be configured. uint64_t HashSpaceNative(String^ s); /// /// Hashes the given namespace . /// /// String to be hashed. /// The resulting hash code. /// The hash code depends on the vowpal wabbit instance as different has functions can be configured. uint64_t HashSpace(String^ s); /// /// Hash the given feature . /// /// String to be hashed. /// Hash offset. /// The resulting hash code. /// The hash code depends on the vowpal wabbit instance as different has functions can be configured. uint64_t HashFeatureNative(String^ s, size_t u); /// /// Hash the given feature . /// /// String to be hashed. /// Hash offset. /// The resulting hash code. /// The hash code depends on the vowpal wabbit instance as different has functions can be configured. uint64_t HashFeature(String^ s, size_t u); /// /// Return full topic allocation [topic, feature]. /// cli::array^>^ GetTopicAllocation(); /// /// Return the topic weights. /// cli::array^>^ GetTopicAllocation(int top); /// /// The associated instance learns from this example and returns the prediction result for this example. /// /// The prediction result. /// The prediction result type. generic T Learn(VowpalWabbitExample^ example, IVowpalWabbitPredictionFactory^ predictionFactory); /// /// Predicts for the given example. /// /// The prediction type. /// Example to predict for. /// The prediction factory to be used. See . /// The prediction for the given . generic T Predict(VowpalWabbitExample^ example, IVowpalWabbitPredictionFactory^ predictionFactory); /// /// Learns from the given example. /// /// Example to learn from. void Learn(VowpalWabbitExample^ example); /// /// Learns from the given multiline example. /// /// Example to learn from. void Learn(List^ examples); /// /// Predicts for the given example. /// /// Example to predict for. void Predict(VowpalWabbitExample^ example); /// /// Predicts for the given multiline example. /// /// Example to predict for. void Predict(List^ examples); /// /// Learns from string data. /// /// Data in vw string format. void Learn(String^ line); /// /// Predicts for string data. /// /// Data in vw string format. void Predict(String^ line); /// /// Learns from string data. /// /// The prediction type. /// Data in vw string format. /// The prediction factory to be used. See . /// The prediction for the given . generic T Learn(String^ line, IVowpalWabbitPredictionFactory^ predictionFactory); /// /// Predicts for string data. /// /// The prediction type. /// Data in vw string format. /// The prediction factory to be used. See . /// The prediction for the given . generic T Predict(String^ line, IVowpalWabbitPredictionFactory^ predictionFactory); /// /// Learns from multi-line examples. /// /// Data in vw string format. void Learn(IEnumerable^ lines); /// /// Predicts for multi-line examples. /// /// Data in vw string format. void Predict(IEnumerable^ lines); /// /// Learns from multi-line examples. /// /// The prediction type. /// Data in vw string format. /// The prediction factory to be used. See . /// The prediction for the given . generic T Learn(IEnumerable^ lines, IVowpalWabbitPredictionFactory^ predictionFactory); /// /// Predicts for the given lines. /// /// The prediction type. /// Data in vw string format. /// The prediction factory to be used. See . /// The prediction for the given . generic T Predict(IEnumerable^ lines, IVowpalWabbitPredictionFactory^ predictionFactory); /// /// Signals the end of a pass. /// void EndOfPass(); /// /// Invokes the driver. /// void Driver(); virtual property VowpalWabbit^ Native { virtual VowpalWabbit^ get() sealed; } /// /// Gets or creates a native example from a CLR maintained, but natively allocated pool. /// /// A ready to use cleared native example data structure. virtual VowpalWabbitExample^ GetOrCreateNativeExample() sealed; /// /// Puts a native example data structure back into the pool. /// /// The example to be returned. virtual void ReturnExampleToPool(VowpalWabbitExample^ example) sealed; }; } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_arguments.h000066400000000000000000000054451332666127000213730ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #pragma once #include #include "vw.h" #include using namespace std; using namespace System; using namespace System::Text; using namespace System::Collections::Generic; namespace VW { /// /// command line arguments extracted from native C++. /// public ref class VowpalWabbitArguments { private: initonly String^ m_data; initonly String^ m_finalRegressor; const bool m_testonly; const int m_passes; List^ m_regressors; String^ m_commandLine; int m_numberOfActions; float m_learning_rate; float m_power_t; internal: VowpalWabbitArguments(vw* vw) : m_data(gcnew String(vw->data_filename.c_str())), m_finalRegressor(gcnew String(vw->final_regressor_name.c_str())), m_testonly(!vw->training), m_passes((int)vw->numpasses) { po::variables_map& vm = vw->opts_n_args.vm; if (vm.count("initial_regressor") || vm.count("i")) { m_regressors = gcnew List; vector regs = vm["initial_regressor"].as< vector >(); for (auto& r : regs) m_regressors->Add(gcnew String(r.c_str())); } StringBuilder^ sb = gcnew StringBuilder(); for (auto& s : vw->opts_n_args.args) sb->AppendFormat("{0} ", gcnew String(s.c_str())); m_commandLine = sb->ToString()->TrimEnd(); if (vw->opts_n_args.vm.count("cb")) m_numberOfActions = (int)vw->opts_n_args.vm["cb"].as(); m_learning_rate = vw->eta; m_power_t = vw->power_t; } public: /// /// The input data file. /// property String^ Data { String^ get() { return m_data; } } /// /// True if "-t" for test only mode supplied as part of arguments. /// property bool TestOnly { bool get() { return m_testonly; } } /// /// Number of passes. /// property int NumPasses { int get() { return m_passes; } } /// /// The output model filename. /// property String^ FinalRegressor { String^ get() { return m_finalRegressor; } } /// ///The list of input model filenames. /// property List^ InitialRegressors { List^ get() { return m_regressors; } } property String^ CommandLine { String^ get() { return m_commandLine; } } property int ContextualBanditNumberOfActions { int get() { return m_numberOfActions; } } property float LearningRate { float get() { return m_learning_rate; } } property float PowerT { float get() { return m_power_t; } } }; } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_base.cpp000066400000000000000000000156361332666127000206360ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #include "vw_clr.h" #include "vw_base.h" #include "vw_model.h" #include "vw_prediction.h" #include "vw_example.h" #include "clr_io.h" #include "clr_io_memory.h" #include "vw_exception.h" #include "parse_args.h" #include "parse_regressor.h" using namespace System; using namespace System::Collections::Generic; using namespace System::Text; namespace VW { void trace_listener_cli(void* context, const std::string& message) { auto listener = (Action^)GCHandle::FromIntPtr(IntPtr(context)).Target; auto str = gcnew String(message.c_str()); listener(str->TrimEnd()); } VowpalWabbitBase::VowpalWabbitBase(VowpalWabbitSettings^ settings) : m_examples(nullptr), m_vw(nullptr), m_model(nullptr), m_settings(settings != nullptr ? settings : gcnew VowpalWabbitSettings), m_instanceCount(0) { if (m_settings->EnableThreadSafeExamplePooling) m_examples = Bag::CreateLockFree(m_settings->MaxExamples); else m_examples = Bag::Create(m_settings->MaxExamples); try { try { std::string string; if (settings->Arguments != nullptr) string = msclr::interop::marshal_as(settings->Arguments); trace_message_t trace_listener = nullptr; void* trace_context = nullptr; if (settings->TraceListener != nullptr) { m_traceListener = GCHandle::Alloc(settings->TraceListener); trace_context = GCHandle::ToIntPtr(m_traceListener).ToPointer(); trace_listener = trace_listener_cli; } if (settings->Model != nullptr) { m_model = settings->Model; if (!settings->Verbose && !settings->Arguments->Contains("--quiet") && !m_model->Arguments->CommandLine->Contains("--quiet")) string.append(" --quiet"); m_vw = VW::seed_vw_model(m_model->m_vw, string, trace_listener, trace_context); m_model->IncrementReference(); } else { if (settings->ModelStream == nullptr) { if (!settings->Verbose && !settings->Arguments->Contains("--quiet")) string.append(" --quiet"); m_vw = VW::initialize(string, nullptr, false, trace_listener, trace_context); } else { clr_io_buf model(settings->ModelStream); if (!settings->Arguments->Contains("--no_stdin")) string += " --no_stdin"; m_vw = VW::initialize(string, &model, false, trace_listener, trace_context); delete settings->ModelStream; settings->ModelStream = nullptr; } } } catch (...) { // memory leak, but better than crashing m_vw = nullptr; throw; } } CATCHRETHROW } VowpalWabbitBase::~VowpalWabbitBase() { this->!VowpalWabbitBase(); if (m_traceListener.IsAllocated) m_traceListener.Free(); } VowpalWabbitBase::!VowpalWabbitBase() { if (m_instanceCount <= 0) { this->InternalDispose(); } } void VowpalWabbitBase::IncrementReference() { // thread-safe increase of model reference counter System::Threading::Interlocked::Increment(m_instanceCount); } void VowpalWabbitBase::DecrementReference() { // thread-safe decrease of model reference counter if (System::Threading::Interlocked::Decrement(m_instanceCount) <= 0) { this->InternalDispose(); } } void VowpalWabbitBase::DisposeExample(VowpalWabbitExample^ ex) { VW::dealloc_example(m_vw->p->lp.delete_label, *ex->m_example); ::free_it(ex->m_example); // cleanup pointers in example chain auto inner = ex; while ((inner = inner->InnerExample) != nullptr) { inner->m_owner = nullptr; inner->m_example = nullptr; } ex->m_example = nullptr; // avoid that this example is returned again ex->m_owner = nullptr; } void VowpalWabbitBase::InternalDispose() { if (m_vw != nullptr) { // de-allocate example pools that are managed for each even shared instances if (m_examples != nullptr) { for each (auto ex in m_examples->RemoveAll()) DisposeExample(ex); m_examples = nullptr; } if (m_model != nullptr) { // this object doesn't own the VW instance m_model->DecrementReference(); m_model = nullptr; } } try { if (m_vw != nullptr) { reset_source(*m_vw, m_vw->num_bits); release_parser_datastructures(*m_vw); // make sure don't try to free m_vw twice in case VW::finish throws. vw* vw_tmp = m_vw; m_vw = nullptr; VW::finish(*vw_tmp); } // don't add code here as in the case of VW::finish throws an exception it won't be called } CATCHRETHROW } VowpalWabbitSettings^ VowpalWabbitBase::Settings::get() { return m_settings; } VowpalWabbitArguments^ VowpalWabbitBase::Arguments::get() { if (m_arguments == nullptr) { m_arguments = gcnew VowpalWabbitArguments(m_vw); } return m_arguments; } void VowpalWabbitBase::Reload([System::Runtime::InteropServices::Optional] String^ args) { if (m_settings->ParallelOptions != nullptr) { throw gcnew NotSupportedException("Cannot reload model if AllRecude is enabled."); } clr_io_memory_buf mem_buf; if (args == nullptr) args = String::Empty; auto stringArgs = msclr::interop::marshal_as(args); try { reset_source(*m_vw, m_vw->num_bits); VW::save_predictor(*m_vw, mem_buf); mem_buf.flush(); release_parser_datastructures(*m_vw); // make sure don't try to free m_vw twice in case VW::finish throws. vw* vw_tmp = m_vw; m_vw = nullptr; VW::finish(*vw_tmp); // reload from model // seek to beginning mem_buf.reset_file(0); m_vw = VW::initialize(stringArgs.c_str(), &mem_buf); } CATCHRETHROW } String^ VowpalWabbitBase::AreFeaturesCompatible(VowpalWabbitBase^ other) { auto diff = VW::are_features_compatible(*m_vw, *other->m_vw); return diff == nullptr ? nullptr : gcnew String(diff); } String^ VowpalWabbitBase::ID::get() { return gcnew String(m_vw->id.c_str()); } void VowpalWabbitBase::ID::set(String^ value) { m_vw->id = msclr::interop::marshal_as(value); } void VowpalWabbitBase::SaveModel() { string name = m_vw->final_regressor_name; if (name.empty()) { return; } // this results in extra marshaling but should be fine here this->SaveModel(gcnew String(name.c_str())); } void VowpalWabbitBase::SaveModel(String^ filename) { if (String::IsNullOrEmpty(filename)) throw gcnew ArgumentException("Filename must not be null or empty"); String^ directoryName = System::IO::Path::GetDirectoryName(filename); if (!String::IsNullOrEmpty(directoryName)) { System::IO::Directory::CreateDirectory(directoryName); } auto name = msclr::interop::marshal_as(filename); try { VW::save_predictor(*m_vw, name); } CATCHRETHROW } void VowpalWabbitBase::SaveModel(Stream^ stream) { if (stream == nullptr) throw gcnew ArgumentException("stream"); try { VW::clr_io_buf buf(stream); VW::save_predictor(*m_vw, buf); } CATCHRETHROW } } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_base.h000066400000000000000000000101261332666127000202700ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #pragma once #include "vw_clr.h" #include #include "vw_interface.h" #include "vw_arguments.h" using namespace std; using namespace System::Collections::Generic; namespace VW { ref class VowpalWabbitPrediction; ref class VowpalWabbitModel; /// /// A base wrapper around vowpal wabbit machine learning instance. /// /// /// Since the model class must delay diposal of until all referencing /// VowpalWabbit instances are disposed, the base class does not dispose . /// public ref class VowpalWabbitBase abstract { private: /// /// The settings used for this instance. /// initonly VowpalWabbitSettings^ m_settings; /// /// Handle to trace listener delegate, required to keep safe from garbage collection. /// GCHandle m_traceListener; /// /// An optional shared model. /// VowpalWabbitModel^ m_model; /// /// Extracted command line arguments. /// VowpalWabbitArguments^ m_arguments; /// /// Reference count to native data structure. /// System::Int32 m_instanceCount; internal: /// /// The native vowpal wabbit data structure. /// vw* m_vw; /// /// Thread-safe increment of reference count. /// void IncrementReference(); /// /// Thread-safe decrement of reference count. /// void DecrementReference(); protected: /// /// True if all nativedata structures are disposed. /// bool m_isDisposed; /// /// Example pool. Kept in base to simplify deallocation. /// IBag^ m_examples; /// /// Initializes a new instance. /// /// Command line arguments. VowpalWabbitBase(VowpalWabbitSettings^ settings); /// /// Cleanup. /// !VowpalWabbitBase(); /// /// Internal dipose using reference counting to delay disposal of shared native data structures. /// void InternalDispose(); void DisposeExample(VowpalWabbitExample^ ex); public: /// /// Cleanup. /// virtual ~VowpalWabbitBase(); /// /// The settings used for this instance. /// property VowpalWabbitSettings^ Settings { VowpalWabbitSettings^ get(); } /// /// Extracted command line arguments. /// property VowpalWabbitArguments^ Arguments { VowpalWabbitArguments^ get(); } /// /// The read/writable model id. /// property String^ ID { String^ get(); void set(String^ id); } /// /// Performs the following steps to reset the learning state: /// /// - Save model to in-memory buffer /// - Dispose existing instance /// - Initialize new instance from in-memory buffer /// void Reload([System::Runtime::InteropServices::Optional] String^ args); /// /// Compares features created by current instance are compatible to features created by . /// /// /// Null if compatible, otherwise the difference /// String^ AreFeaturesCompatible(VowpalWabbitBase^ other); /// /// Persist model to file specified by -i. /// void SaveModel(); /// /// Persist model to . /// /// The destination filename for the model. void SaveModel(String^ filename); /// /// Persist model to . /// /// The destination stream for the model. /// The stream is not closed to support embedded schemes. void SaveModel(Stream^ stream); }; } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_builder.cpp000066400000000000000000000062471332666127000213500ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #include "vw_builder.h" #include "parser.h" namespace VW { VowpalWabbitExampleBuilder::VowpalWabbitExampleBuilder(IVowpalWabbitExamplePool^ vw) : m_vw(vw), m_example(nullptr) { if (vw == nullptr) throw gcnew ArgumentNullException("vw"); m_example = vw->GetOrCreateNativeExample(); } VowpalWabbitExampleBuilder::~VowpalWabbitExampleBuilder() { this->!VowpalWabbitExampleBuilder(); } VowpalWabbitExampleBuilder::!VowpalWabbitExampleBuilder() { if (m_example != nullptr) { // in case CreateExample is not getting called delete m_example; m_example = nullptr; } } VowpalWabbitExample^ VowpalWabbitExampleBuilder::CreateExample() { if (m_example == nullptr) return nullptr; try { // finalize example VW::setup_example(*m_vw->Native->m_vw, m_example->m_example); } CATCHRETHROW // hand memory management off to VowpalWabbitExample auto ret = m_example; m_example = nullptr; return ret; } void VowpalWabbitExampleBuilder::ApplyLabel(ILabel^ label) { if (label == nullptr) return; label->UpdateExample(m_vw->Native->m_vw, m_example->m_example); } VowpalWabbitNamespaceBuilder^ VowpalWabbitExampleBuilder::AddNamespace(Char featureGroup) { return AddNamespace((Byte)featureGroup); } VowpalWabbitNamespaceBuilder^ VowpalWabbitExampleBuilder::AddNamespace(Byte featureGroup) { uint32_t index = featureGroup; example* ex = m_example->m_example; return gcnew VowpalWabbitNamespaceBuilder(ex->feature_space + index, featureGroup, m_example->m_example); } VowpalWabbitNamespaceBuilder::VowpalWabbitNamespaceBuilder(features* features, unsigned char index, example* example) : m_features(features), m_index(index), m_example(example) { } VowpalWabbitNamespaceBuilder::~VowpalWabbitNamespaceBuilder() { this->!VowpalWabbitNamespaceBuilder(); } VowpalWabbitNamespaceBuilder::!VowpalWabbitNamespaceBuilder() { if (m_features->size() > 0) { unsigned char temp = m_index; // avoid duplicate insertion // can't check at the beginning, because multiple builders can be open // at the same time for (unsigned char ns : m_example->indices) if (ns == temp) return; m_example->indices.push_back(temp); } } void VowpalWabbitNamespaceBuilder::AddFeaturesUnchecked(uint64_t weight_index_base, float* begin, float* end) { for (; begin != end; begin++) { float x = *begin; if (x != 0) { m_features->values.push_back_unchecked(x); m_features->indicies.push_back_unchecked(weight_index_base); } weight_index_base++; } } void VowpalWabbitNamespaceBuilder::AddFeature(uint64_t weight_index, float x) { // filter out 0-values if (x == 0) return; m_features->push_back(x, weight_index); } void VowpalWabbitNamespaceBuilder::PreAllocate(int size) { m_features->values.resize(m_features->values.end() - m_features->values.begin() + size); m_features->indicies.resize(m_features->indicies.end() - m_features->indicies.begin() + size); } size_t VowpalWabbitNamespaceBuilder::FeatureCount::get() { return m_features->size(); } } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_builder.h000066400000000000000000000072011332666127000210040ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #pragma once #include "vw_clr.h" #include "vw_base.h" #include "vw_example.h" #include "vowpalwabbit.h" #include "vw_label.h" namespace VW { using namespace VW::Labels; /// /// Helper class to ease construction of native vowpal wabbit namespace data structure. /// public ref class VowpalWabbitNamespaceBuilder sealed { private: /// /// Features. /// features* m_features; /// /// The namespace index. /// unsigned char m_index; /// /// The native example. /// example* m_example; // float(*m_sum_of_squares)(float*, float*); !VowpalWabbitNamespaceBuilder(); internal: /// /// Initializes a new instance. /// /// Pointer into features owned by . /// The namespace index. /// The native example to build up. VowpalWabbitNamespaceBuilder(features* features, unsigned char index, example* example); public: ~VowpalWabbitNamespaceBuilder(); /// /// Add feature entry. /// /// The weight index. /// The value. void AddFeature(uint64_t weight_index, float x); /// /// Adds a dense array to the example. /// /// The base weight index. Each element is then placed relative to this index. /// The start pointer of the float array. /// The end pointer of the float array. void AddFeaturesUnchecked(uint64_t weight_index_base, float* begin, float* end); /// /// Pre-allocate features of . /// /// The number of features to pre-allocate. void PreAllocate(int size); property size_t FeatureCount { size_t get(); } }; /// /// Helper class to ease construction of native vowpal wabbit example data structure. /// public ref class VowpalWabbitExampleBuilder sealed { private: IVowpalWabbitExamplePool^ m_vw; /// /// The produced CLR example data structure. /// VowpalWabbitExample^ m_example; protected: /// /// Cleanup. /// !VowpalWabbitExampleBuilder(); public: /// /// Initializes a new instance. /// /// The parent vowpal wabbit instance. VowpalWabbitExampleBuilder(IVowpalWabbitExamplePool^ vw); /// /// Cleanup. /// ~VowpalWabbitExampleBuilder(); /// /// Creates the managed example representation. /// /// Creates the managed example. VowpalWabbitExample^ CreateExample(); /// /// Sets the label for the resulting example. /// void ApplyLabel(ILabel^ label); /// /// Creates and adds a new namespace to this example. /// VowpalWabbitNamespaceBuilder^ AddNamespace(Byte featureGroup); /// /// Creates and adds a new namespace to this example. /// /// The feature group of the new namespace. /// Casts to System::Byte. VowpalWabbitNamespaceBuilder^ AddNamespace(Char featureGroup); }; } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_cbutil.cpp000066400000000000000000000010221332666127000211660ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #include "vw_cbutil.h" #include "cb_algs.h" namespace VW { float VowpalWabbitContextualBanditUtil::GetUnbiasedCost(uint32_t actionObservered, uint32_t actionTaken, float cost, float probability) { CB::cb_class observation = { cost, actionObservered, probability }; return CB_ALGS::get_unbiased_cost(&observation, actionTaken); } } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_cbutil.h000066400000000000000000000006661332666127000206500ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #pragma once #include "vw_clr.h" namespace VW { public ref class VowpalWabbitContextualBanditUtil abstract sealed { public: static float GetUnbiasedCost(uint32_t actionObservered, uint32_t actionTaken, float cost, float probability); }; } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_clr.h000066400000000000000000000102631332666127000201400ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #pragma once #ifdef _MSC_VER #define _CRT_SECURE_NO_WARNINGS #endif #include "vw.h" #include "vw_settings.h" #include using namespace System; using namespace System::Runtime::InteropServices; namespace VW { /// /// Collected performance statistics. /// public ref class VowpalWabbitPerformanceStatistics { public: /// /// The total number of features seen since instance creation. /// property uint64_t TotalNumberOfFeatures; /// /// The weighted sum of examples. /// property double WeightedExampleSum; /// /// The total number of examples per pass. /// property uint64_t NumberOfExamplesPerPass; /// /// The weighted sum of labels. /// property double WeightedLabelSum; /// /// The average loss since instance creation. /// property double AverageLoss; /// /// The best constant since instance creation. /// property double BestConstant; /// /// The best constant loss since instance creation. /// property double BestConstantLoss; }; /// /// A managed wrapper for native vowpal wabbit exceptions. /// /// /// As the default managed exception wrapping any native exception doesn't even capture exception::what() /// this wrapper was created. /// [Serializable] public ref class VowpalWabbitException : Exception { private: /// /// The source filename in which the wrapped exception occurred. /// initonly String^ m_filename; /// /// The line number in which the wrapped exception occurred. /// initonly Int32 m_lineNumber; public: /// /// Initializes a new instance of . /// /// The native vowpal wabbit exception VowpalWabbitException(const vw_exception& ex); /// /// Gets the source filename in which the wrapped exception occurred. /// property String^ Filename { String^ get(); } /// /// Gets the line number in which the wrapped exception occurred. /// property Int32 LineNumber { Int32 get(); } }; /// /// A managed wrapper for native vowpal wabbit exceptions. /// /// /// As the default managed exception wrapping any native exception doesn't even capture exception::what() /// this wrapper was created. /// [Serializable] public ref class VowpalWabbitArgumentDisagreementException : VowpalWabbitException { public: /// /// Initializes a new instance of . /// /// The native vowpal wabbit exception VowpalWabbitArgumentDisagreementException(const vw_argument_disagreement_exception& ex); }; #ifdef _DEBUG [System::ComponentModel::Browsable(false)] [System::ComponentModel::EditorBrowsable(System::ComponentModel::EditorBrowsableState::Never)] public ref class VowpalWabbitLeakTest abstract sealed { public: static void Leak() { new float[123]; } static void NoLeak() { void* ptr = calloc(128, 2); ptr = realloc(ptr, 128 * 3); free(ptr); } }; #endif } #define CATCHRETHROW \ catch (VW::vw_exception const& ex) \ { throw gcnew VW::VowpalWabbitException(ex); } \ catch (std::exception const& ex) \ { throw gcnew System::Exception(gcnew System::String(ex.what())); } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_clr.vcxproj000066400000000000000000001170541332666127000214120ustar00rootroot00000000000000 DebugLeakCheck Win32 DebugLeakCheck x64 Debug Win32 Debug x64 Release Win32 Release x64 {85E55AE0-3784-4968-9271-C81AF560E1C1} v4.5.2 ManagedCProj vw_clr ..\..\vowpalwabbit\ ..\..\sdl\SDL-7.0-Recommended.ruleset true DynamicLibrary true v140 true Unicode DynamicLibrary true v140 true Unicode DynamicLibrary true v140 true Unicode DynamicLibrary true v140 true Unicode true DynamicLibrary false v140 true Unicode DynamicLibrary false v140 true Unicode 8.1 true ..\vw_key.snk true ..\vw_key.snk false VowpalWabbit.Core false ..\vw_key.snk $(Platform)\$(Configuration)\$(ProjectName)\ true VowpalWabbit.Core false ..\vw_key.snk $(Platform)\$(Configuration)\$(ProjectName)\ false ..\vw_key.snk false VowpalWabbit.Core ..\vw_key.snk $(Platform)\$(Configuration)\$(ProjectName)\ Level3 Disabled WIN32;_DEBUG;%(PreprocessorDefinitions) NotUsing $(SolutionDir)win32;$(ProjectDir)\..\..\vowpalwabbit;%(AdditionalIncludeDirectories);$(ProjectDir)\..\..\rapidjson\include;$(ProjectDir)\..\..\explore true $(BoostLibDir);%(AdditionalLibraryDirectories) kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;ws2_32.lib;$(SolutionDir)$(PlatformShortName)\$(Configuration)\vw_dynamic.lib;%(AdditionalDependencies) Level3 Disabled WIN32;_DEBUG;%(PreprocessorDefinitions) NotUsing $(SolutionDir)win32;$(ProjectDir)\..\..\vowpalwabbit;%(AdditionalIncludeDirectories);$(ProjectDir)\..\..\rapidjson\include;$(ProjectDir)\..\..\explore true $(BoostLibDir);%(AdditionalLibraryDirectories) kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;ws2_32.lib;$(SolutionDir)$(PlatformShortName)\$(Configuration)\vw_dynamic.lib;%(AdditionalDependencies) Level3 Disabled WIN32;_DEBUG;%(PreprocessorDefinitions) NotUsing $(SolutionDir)win32;$(ProjectDir)\..\..\vowpalwabbit;%(AdditionalIncludeDirectories);$(ProjectDir)\..\..\rapidjson\include;$(ProjectDir)\..\..\explore true $(OutDir)$(TargetName).pdb false true false true kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;ws2_32.lib;%(AdditionalDependencies) true Level3 Disabled WIN32;_DEBUG;%(PreprocessorDefinitions) NotUsing $(SolutionDir)win32;$(ProjectDir)\..\..\vowpalwabbit;%(AdditionalIncludeDirectories);$(ProjectDir)\..\..\rapidjson\include;$(ProjectDir)\..\..\explore true $(OutDir)$(TargetName).pdb false true false true kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;ws2_32.lib;%(AdditionalDependencies) Level3 WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions) NotUsing $(SolutionDir)win32;$(ProjectDir)\..\..\vowpalwabbit;%(AdditionalIncludeDirectories);$(ProjectDir)\..\..\rapidjson\include;$(ProjectDir)\..\..\explore true $(BoostLibDir);%(AdditionalLibraryDirectories) kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;ws2_32.lib;$(SolutionDir)$(PlatformShortName)\$(Configuration)\vw_dynamic.lib;%(AdditionalDependencies) Level3 WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions) NotUsing $(SolutionDir)win32;$(ProjectDir)\..\..\vowpalwabbit;%(AdditionalIncludeDirectories);$(ProjectDir)\..\..\rapidjson\include;$(ProjectDir)\..\..\explore true $(OutDir)$(TargetName).pdb true false true kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;ws2_32.lib;%(AdditionalDependencies) true false $(SolutionDir)\packages\Newtonsoft.Json.8.0.3\lib\net45\Newtonsoft.Json.dll {1e205806-7f80-47dd-a38d-fc08083f3593} {e621e022-c1f8-433f-905a-ab9a3de072b7} Designer This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_clr.vcxproj.filters000066400000000000000000000073151332666127000230570ustar00rootroot00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hh;hpp;hxx;hm;inl;inc;xsd {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Resource Files vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_example.cpp000066400000000000000000000324341332666127000213520ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #include "vowpalwabbit.h" #include "vw_example.h" #include "vw_prediction.h" #include "gd.h" namespace VW { using namespace Labels; VowpalWabbitExample::VowpalWabbitExample(IVowpalWabbitExamplePool^ owner, example* example) : m_owner(owner), m_example(example), m_innerExample(nullptr) { } VowpalWabbitExample::VowpalWabbitExample(IVowpalWabbitExamplePool^ owner, VowpalWabbitExample^ example) : m_owner(owner), m_example(example->m_example), m_innerExample(example), m_string(example->m_string) { } VowpalWabbitExample::!VowpalWabbitExample() { if (m_owner != nullptr) m_owner->ReturnExampleToPool(this); } VowpalWabbitExample::~VowpalWabbitExample() { this->!VowpalWabbitExample(); } VowpalWabbitExample^ VowpalWabbitExample::InnerExample::get() { return m_innerExample; } IVowpalWabbitExamplePool^ VowpalWabbitExample::Owner::get() { return m_owner; } size_t VowpalWabbitExample::NumberOfFeatures::get() { return m_example->num_features; } generic T VowpalWabbitExample::GetPrediction(VowpalWabbit^ vw, IVowpalWabbitPredictionFactory^ factory) { #ifdef _DEBUG if (vw == nullptr) throw gcnew ArgumentNullException("vw"); #endif return factory->Create(vw->m_vw, m_example); } String^ VowpalWabbitExample::VowpalWabbitString::get() { return m_string; } void VowpalWabbitExample::VowpalWabbitString::set(String^ value) { m_string = value; } bool VowpalWabbitExample::IsNewLine::get() { return example_is_newline(*m_example) != 0; } ILabel^ VowpalWabbitExample::Label::get() { ILabel^ label; auto lp = m_owner->Native->m_vw->p->lp; if (!memcmp(&lp, &simple_label, sizeof(lp))) label = gcnew SimpleLabel(); else if (!memcmp(&lp, &CB::cb_label, sizeof(lp))) label = gcnew ContextualBanditLabel(); else if (!memcmp(&lp, &CB_EVAL::cb_eval, sizeof(lp))) label = gcnew SimpleLabel(); else if (!memcmp(&lp, &COST_SENSITIVE::cs_label, sizeof(lp))) label = gcnew SimpleLabel(); else return nullptr; // TODO: //else if (!memcmp(&lp, &MULTICLASS::multilabel, sizeof(lp))) // label = gcnew MulticlassLabel; //else if (!memcmp(&lp, &MC::multilabel, sizeof(lp))) label->ReadFromExample(this->m_example); return label; } void VowpalWabbitExample::Label::set(ILabel^ label) { if (label == nullptr) return; label->UpdateExample(m_owner->Native->m_vw, m_example); // we need to update the example weight as setup_example() can be called prior to this call. m_example->weight = m_owner->Native->m_vw->p->lp.get_weight(&m_example->l); } void VowpalWabbitExample::MakeEmpty(VowpalWabbit^ vw) { char empty = '\0'; VW::read_line(*vw->m_vw, m_example, &empty); VW::setup_example(*vw->m_vw, m_example); } void FormatIndices(example* a, System::Text::StringBuilder^ sb) { for (auto ns : a->indices) { if (ns == 0) sb->Append("NULL:0,"); else sb->AppendFormat("'{0}':{1},", gcnew System::Char(ns), (int)ns); } } System::String^ FormatIndices(example* a, example *b) { auto sb = gcnew System::Text::StringBuilder(); sb->AppendFormat("Namespace indicies differ: {0} vs {1}. this.indices: [", a->indices.size(), b->indices.size()); FormatIndices(a, sb); sb->Append("] other.indices: ["); FormatIndices(b, sb); sb->Append("]"); return sb->ToString(); } System::String^ FormatFeature(vw* vw, feature_value& f1, feature_index& i1) { uint64_t masked_weight_index1 = i1 & vw->weights.mask(); return System::String::Format( "weight_index = {0}/{1}, x = {2}", masked_weight_index1, i1, gcnew System::Single(f1)); } System::String^ FormatFeature(vw* vw, feature_value& f1, feature_index& i1, feature_value& f2, feature_index& i2) { return System::String::Format( "Feature differ: this({0}) vs other({1})", FormatFeature(vw, f1, i1), FormatFeature(vw, f2, i2)); } bool FloatEqual(float a, float b) { if ((abs(a) < 1e-20 && abs(b) < 1e-20) || (isinf(a) && isinf(b))) { return true; } return abs(a - b) / max(a, b) < 1e-6; } System::String^ FormatFeatures(vw* vw, features& arr) { auto sb = gcnew System::Text::StringBuilder(); for (size_t i = 0; i < arr.values.size(); i++) { sb->Append(FormatFeature(vw, arr.values[i], arr.indicies[i]))->Append(" "); } return sb->ToString(); } System::String^ CompareFeatures(vw* vw, features& fa, features& fb, unsigned char ns) { vector fa_missing; for (size_t ia = 0, ib = 0; ia < fa.values.size(); ia++) { auto masked_weight_index = fa.indicies[ia] & vw->weights.mask(); auto other_masked_weight_index = fb.indicies[ib] & vw->weights.mask(); /*System::Diagnostics::Debug::WriteLine(System::String::Format("{0} -> {1} vs {2} -> {3}", fa.indicies[ia], masked_weight_index, fb.indicies[ib], other_masked_weight_index ));*/ if (masked_weight_index == other_masked_weight_index && FloatEqual(fa.values[ia], fb.values[ib])) ib++; else { // fallback to search size_t ib_old = ib; bool found = false; for (ib = 0; ib < fb.values.size(); ib++) { auto other_masked_weight_index = fb.indicies[ib] & vw->weights.mask(); if (masked_weight_index == other_masked_weight_index) { if (!FloatEqual(fa.values[ia], fb.values[ib])) { return FormatFeature(vw, fa.values[ia], fa.indicies[ia], fb.values[ib], fb.indicies[ib]); } else { found = true; break; } } } if (!found) { fa_missing.push_back(ia); } ib = ib_old + 1; } } if (!fa_missing.empty()) { auto diff = gcnew System::Text::StringBuilder(); diff->AppendFormat("missing features in ns '{0}'/'{1}': ", ns, gcnew Char(ns)); for (size_t& ia : fa_missing) { diff->AppendFormat("this.weight_index = {0}, x = {1}, ", fa.indicies[ia] & vw->weights.mask(), fa.values[ia]); } return diff->ToString(); } return nullptr; } System::String^ VowpalWabbitExample::Diff(VowpalWabbit^ vw, VowpalWabbitExample^ other, IVowpalWabbitLabelComparator^ labelComparator) { auto a = this->m_example; auto b = other->m_example; if (a->indices.size() != b->indices.size()) { return FormatIndices(a, b); } for (auto i = a->indices.begin(), j = b->indices.begin(); i != a->indices.end(); i++) { if (*i == *j) j++; else { // fall back on search auto j_old = j; j = b->indices.begin(); bool found = false; for (; j != b->indices.end(); j++) { if (*i == *j) { found = true; break; } } if (!found) return FormatIndices(a, b); j = j_old + 1; } // compare features features& fa = a->feature_space[*i]; features& fb = b->feature_space[*i]; if (fa.size() != fb.size()) return System::String::Format("Feature length differ {0} vs {1}. this({2}) vs other({3})", fa.size(), fb.size(), FormatFeatures(vw->m_vw, fa), FormatFeatures(vw->m_vw, fb)); auto diff = CompareFeatures(vw->m_vw, fa, fb, *i); if (diff != nullptr) return diff; diff = CompareFeatures(vw->m_vw, fb, fa, *i); if (diff != nullptr) return diff; } if (labelComparator != nullptr) { // Compare the label auto diff = labelComparator->Diff(this, other); if (diff != nullptr) return diff; } return nullptr; } String^ VowpalWabbitSimpleLabelComparator::Diff(VowpalWabbitExample^ ex1, VowpalWabbitExample^ ex2) { auto s1 = ex1->m_example->l.simple; auto s2 = ex2->m_example->l.simple; if (!(FloatEqual(s1.initial, s2.initial) && FloatEqual(s1.label, s2.label) && FloatEqual(s1.weight, s2.weight))) { return System::String::Format("Label differ. label {0} vs {1}. initial {2} vs {3}. weight {4} vs {5}", s1.label, s2.label, s1.initial, s2.initial, s1.weight, s2.weight); } return nullptr; } String^ VowpalWabbitContextualBanditLabelComparator::Diff(VowpalWabbitExample^ ex1, VowpalWabbitExample^ ex2) { auto s1 = ex1->m_example->l.cb; auto s2 = ex2->m_example->l.cb; if (s1.costs.size() != s2.costs.size()) { return System::String::Format("Cost size differ: {0} vs {1}", s1.costs.size(), s2.costs.size()); } for (size_t i = 0; i < s1.costs.size(); i++) { auto c1 = s1.costs[i]; auto c2 = s2.costs[i]; if (c1.action != c2.action) { return System::String::Format("Action differ: {0} vs {1}", c1.action, c2.action); } if (c1.cost != c2.cost) { return System::String::Format("Cost differ: {0} vs {1}", c1.cost, c2.cost); } if (abs(c1.probability - c2.probability) / max(c1.probability, c2.probability) > 0.01) { return System::String::Format("Probability differ: {0} vs {1}", c1.probability, c2.probability); } } return nullptr; } System::Collections::IEnumerator^ VowpalWabbitExample::EnumerableGetEnumerator::get() { return GetEnumerator(); } IEnumerator^ VowpalWabbitExample::GetEnumerator() { return gcnew NamespaceEnumerator(this); } VowpalWabbitExample::NamespaceEnumerator::NamespaceEnumerator(VowpalWabbitExample^ example) : m_example(example) { Reset(); } VowpalWabbitExample::NamespaceEnumerator::~NamespaceEnumerator() { } bool VowpalWabbitExample::NamespaceEnumerator::MoveNext() { m_current++; return m_current < m_example->m_example->indices.end(); } void VowpalWabbitExample::NamespaceEnumerator::Reset() { // position before the beginning. m_current = m_example->m_example->indices.begin() - 1; } VowpalWabbitNamespace^ VowpalWabbitExample::NamespaceEnumerator::Current::get() { if (m_current < m_example->m_example->indices.begin() || m_current >= m_example->m_example->indices.end()) throw gcnew InvalidOperationException(); return gcnew VowpalWabbitNamespace(m_example, *m_current, &m_example->m_example->feature_space[*m_current]); } System::Object^ VowpalWabbitExample::NamespaceEnumerator::IEnumeratorCurrent::get() { return Current; } VowpalWabbitFeature::VowpalWabbitFeature(VowpalWabbitExample^ example, feature_value x, uint64_t weight_index) : m_example(example), m_vw(m_example->Owner->Native), m_x(x), m_weight_index(weight_index) { } VowpalWabbitFeature::VowpalWabbitFeature(VowpalWabbit^ vw, feature_value x, uint64_t weight_index) : m_vw(vw), m_x(x), m_weight_index(weight_index) { } float VowpalWabbitFeature::X::get() { return m_x; } uint64_t VowpalWabbitFeature::FeatureIndex::get() { return m_weight_index; } uint64_t VowpalWabbitFeature::WeightIndex::get() { if (m_example == nullptr) throw gcnew InvalidOperationException("VowpalWabbitFeature must be initialized with example"); vw* vw = m_example->Owner->Native->m_vw; return ((m_weight_index + m_example->m_example->ft_offset) >> vw->weights.stride_shift()) & vw->parse_mask; } float VowpalWabbitFeature::Weight::get() { if (m_example == nullptr) throw gcnew InvalidOperationException("VowpalWabbitFeature must be initialized with example"); vw* vw = m_example->Owner->Native->m_vw; uint64_t weightIndex = m_weight_index + m_example->m_example->ft_offset; return vw->weights[weightIndex]; } float VowpalWabbitFeature::AuditWeight::get() { vw* vw = m_vw->m_vw; return GD::trunc_weight(Weight, (float)vw->sd->gravity) * (float)vw->sd->contraction; } bool VowpalWabbitFeature::Equals(Object^ o) { VowpalWabbitFeature^ other = dynamic_cast(o); return other != nullptr && other->m_x == m_x && other->m_weight_index == m_weight_index; } int VowpalWabbitFeature::GetHashCode() { return (int)(m_x + m_weight_index); } VowpalWabbitNamespace::VowpalWabbitNamespace(VowpalWabbitExample^ example, namespace_index ns, features* features) : m_example(example), m_ns(ns), m_features(features) { } VowpalWabbitNamespace::~VowpalWabbitNamespace() { } namespace_index VowpalWabbitNamespace::Index::get() { return m_ns; } System::Collections::IEnumerator^ VowpalWabbitNamespace::EnumerableGetEnumerator::get() { return GetEnumerator(); } IEnumerator^ VowpalWabbitNamespace::GetEnumerator() { return gcnew FeatureEnumerator(m_example, m_features); } VowpalWabbitNamespace::FeatureEnumerator::FeatureEnumerator(VowpalWabbitExample^ example, features* features) : m_example(example), m_features(features), m_iterator(nullptr) { m_end = new Holder { features->end() }; } VowpalWabbitNamespace::FeatureEnumerator::~FeatureEnumerator() { delete m_end; delete m_iterator; } void VowpalWabbitNamespace::FeatureEnumerator::Reset() { delete m_iterator; m_iterator = nullptr; } bool VowpalWabbitNamespace::FeatureEnumerator::MoveNext() { if (m_iterator) ++m_iterator->value; else m_iterator = new Holder { m_features->begin() }; return m_iterator->value != m_end->value; } System::Object^ VowpalWabbitNamespace::FeatureEnumerator::IEnumeratorCurrent::get() { return Current; } VowpalWabbitFeature^ VowpalWabbitNamespace::FeatureEnumerator::Current::get() { if (!m_iterator || m_iterator->value == m_end->value) throw gcnew InvalidOperationException(); return gcnew VowpalWabbitFeature(m_example, m_iterator->value.value(), m_iterator->value.index()); } } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_example.h000066400000000000000000000147731332666127000210250ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #pragma once #include "vw_clr.h" #include "vw_interface.h" #include "vw_labelcomparator.h" #include "vw_label.h" namespace VW { using namespace System::Collections::Generic; using namespace VW::Labels; ref class VowpalWabbitExample; ref class VowpalWabbit; [System::Diagnostics::DebuggerDisplay("{m_weight_index}:{m_x}")] public ref struct VowpalWabbitFeature { private: feature_value m_x; uint64_t m_weight_index; VowpalWabbitExample^ m_example; VowpalWabbit^ m_vw; public: VowpalWabbitFeature(VowpalWabbitExample^ example, feature_value x, uint64_t weight_index); VowpalWabbitFeature(VowpalWabbit^ vw, feature_value x, uint64_t weight_index); property feature_value X { float get(); } property uint64_t FeatureIndex { uint64_t get(); } property uint64_t WeightIndex { uint64_t get(); } property float Weight { float get(); } property float AuditWeight { float get(); } virtual bool Equals(Object^ o) override; virtual int GetHashCode() override; }; template struct Holder { T value; }; [System::Diagnostics::DebuggerDisplay("{Index} = '{(char)Index}'")] public ref struct VowpalWabbitNamespace : public IEnumerable { private: ref class FeatureEnumerator : public IEnumerator { private: VowpalWabbitExample^ m_example; features* m_features; Holder* m_iterator; Holder* m_end; internal: FeatureEnumerator(VowpalWabbitExample^ example, features* features); ~FeatureEnumerator(); property System::Object^ IEnumeratorCurrent { virtual System::Object^ get() sealed = System::Collections::IEnumerator::Current::get; } public: virtual bool MoveNext(); virtual void Reset(); property VowpalWabbitFeature^ Current { virtual VowpalWabbitFeature^ get(); } }; namespace_index m_ns; features* m_features; VowpalWabbitExample^ m_example; property System::Collections::IEnumerator^ EnumerableGetEnumerator { virtual System::Collections::IEnumerator^ get() sealed = System::Collections::IEnumerable::GetEnumerator; } public: VowpalWabbitNamespace(VowpalWabbitExample^ m_example, namespace_index ns, features* features); ~VowpalWabbitNamespace(); property namespace_index Index { namespace_index get(); } virtual IEnumerator^ GetEnumerator(); }; /// /// A CLR representation of a vowpal wabbit example. /// /// /// Underlying memory is allocated by native code, but examples are not part of the ring. /// [System::Diagnostics::DebuggerDisplay("{m_example}: '{m_string}'")] public ref class VowpalWabbitExample : public IEnumerable { private: /// /// Reference to an optional underlying example. /// /// If this instance owns this is null. initonly VowpalWabbitExample^ m_innerExample; ref class NamespaceEnumerator : public IEnumerator { private: VowpalWabbitExample^ m_example; namespace_index* m_current; internal: NamespaceEnumerator(VowpalWabbitExample^ example); ~NamespaceEnumerator(); property System::Object^ IEnumeratorCurrent { virtual System::Object^ get() sealed = System::Collections::IEnumerator::Current::get; } public: virtual bool MoveNext(); virtual void Reset(); property VowpalWabbitNamespace^ Current { virtual VowpalWabbitNamespace^ get(); } }; protected: /// /// Returns native example data structure to owning instance. /// !VowpalWabbitExample(); internal: /// /// Initializes a new instance of . /// /// The parent instance. Examples cannot be shared between vw instances. /// The already allocated example structure VowpalWabbitExample(IVowpalWabbitExamplePool^ owner, example* example); /// /// The native example data structure. /// example* m_example; /// /// The owner of this example. /// IVowpalWabbitExamplePool^ m_owner; /// /// The optional string version of the example. /// String^ m_string; public: /// /// Initializes a new instance of . /// /// The parent instance. Examples cannot be shared between instances. /// The inner example this instance wraps. VowpalWabbitExample(IVowpalWabbitExamplePool^ owner, VowpalWabbitExample^ example); /// /// Returns native example data structure to owning pool. /// ~VowpalWabbitExample(); /// /// Extracts the prediction from this example using the given prediction factory. /// /// The prediction stored in this example. generic T GetPrediction(VowpalWabbit^ vw, IVowpalWabbitPredictionFactory^ factory); /// /// An optional inner example this example wraps. /// property VowpalWabbitExample^ InnerExample { VowpalWabbitExample^ get(); } /// /// The owner of this example. /// property IVowpalWabbitExamplePool^ Owner { IVowpalWabbitExamplePool^ get(); } /// /// The corresponding VowpalWabbitString for this example. /// property String^ VowpalWabbitString { String^ get(); void set(String^ value); } /// /// True if this is a new line example, otherwise false. /// /// A example without features is considered a new line example. property bool IsNewLine { bool get(); } String^ Diff(VowpalWabbit^ vw, VowpalWabbitExample^ other, IVowpalWabbitLabelComparator^ labelComparator); void MakeEmpty(VowpalWabbit^ vw); property System::Collections::IEnumerator^ EnumerableGetEnumerator { virtual System::Collections::IEnumerator^ get() sealed = System::Collections::IEnumerable::GetEnumerator; } virtual IEnumerator^ GetEnumerator(); property size_t NumberOfFeatures { size_t get(); } property ILabel^ Label { ILabel^ get(); void set(ILabel^ label); } }; } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_exception.cpp000066400000000000000000000013461332666127000217130ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #include "vw_clr.h" namespace VW { VowpalWabbitException::VowpalWabbitException(const vw_exception& ex) : Exception(gcnew System::String(ex.what())), m_filename(gcnew System::String(ex.Filename())), m_lineNumber(ex.LineNumber()) { } String^ VowpalWabbitException::Filename::get() { return m_filename; } Int32 VowpalWabbitException::LineNumber::get() { return m_lineNumber; } VowpalWabbitArgumentDisagreementException::VowpalWabbitArgumentDisagreementException(const vw_argument_disagreement_exception& ex) : VowpalWabbitException(ex) { } } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_interface.h000066400000000000000000000016351332666127000213230ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #pragma once #include "vw.h" #include "vw_clr.h" #include "vw_prediction.h" namespace VW { ref class VowpalWabbitExample; ref class VowpalWabbitBase; /// /// Owners of example must implement this interface. /// public interface class IVowpalWabbitExamplePool : public System::IDisposable { /// /// Gets or creates a new native examples. /// VowpalWabbitExample^ GetOrCreateNativeExample(); property VowpalWabbit^ Native { VowpalWabbit^ get(); } /// /// Puts a native example data structure back into the pool. /// /// The example to be returned. void ReturnExampleToPool(VowpalWabbitExample^ example); }; } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_label.h000066400000000000000000000170611332666127000204420ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #pragma once #include "vw.h" #include "vw_clr.h" #include "cb.h" #include "best_constant.h" #include "multiclass.h" namespace VW { namespace Labels { // The label classes are a replication of the parse_label function pointers provided by individual // modules. Main reason for creation is thread-saftey. The C++ version use a shared v_array in parser // and thus need to be synchronized. // These label classes are thread-safe and even more efficient as they avoid marshalling. using namespace System; using namespace System::Collections::Generic; using namespace System::Globalization; using namespace System::Text; using namespace CB; using namespace MULTICLASS; using namespace Newtonsoft::Json; public interface class ILabel { void UpdateExample(vw* vw, example* ex); void ReadFromExample(example* ex); }; public ref class ContextualBanditLabel sealed : ILabel { private: uint32_t m_action; float m_cost; float m_probability; public: ContextualBanditLabel() : m_action(0), m_cost(0), m_probability(0) { } ContextualBanditLabel(uint32_t action, float cost, float probability) : m_action(action), m_cost(cost), m_probability(0) { Probability = probability; } [JsonProperty] property uint32_t Action { uint32_t get() { return m_action; } void set(uint32_t value) { m_action = value; } } [JsonProperty] property float Probability { float get() { return m_probability; } void set(float value) { if (value < 0 || value >1) throw gcnew ArgumentOutOfRangeException("invalid probability: " + value); m_probability = value; } } [JsonProperty] property float Cost { float get() { return m_cost; } void set(float value) { m_cost = value; } } [JsonIgnore] property bool IsShared { bool get() { return m_cost == FLT_MAX && m_probability == -1.f; } } virtual void ReadFromExample(example* ex) { CB::label* ld = (CB::label*)&ex->l; if (ld->costs.size() > 0) { cb_class& f = ld->costs[0]; m_action = f.action; m_cost = f.cost; m_probability = f.probability; } } virtual void UpdateExample(vw* vw, example* ex) { CB::label* ld = (CB::label*)&ex->l; cb_class f; f.partial_prediction = 0.; f.action = m_action; f.cost = m_cost; f.probability = m_probability; ld->costs.push_back(f); } virtual String^ ToString() override { auto sb = gcnew StringBuilder; sb->Append(m_action.ToString(CultureInfo::InvariantCulture)); sb->Append(L':'); sb->Append(m_cost.ToString(CultureInfo::InvariantCulture)); sb->Append(L':'); sb->Append(m_probability.ToString(CultureInfo::InvariantCulture)); return sb->ToString(); } }; /// /// In multi-line scenarios the first example can contain a set of shared features. This first example must be /// marked using a 'shared' label. /// public ref class SharedLabel sealed : ILabel { private: uint32_t m_action; SharedLabel() : m_action((uint32_t)uniform_hash("shared", 6, 0)) { } public: static SharedLabel^ Instance = gcnew SharedLabel; virtual void UpdateExample(vw* vw, example* ex) { CB::label* ld = (CB::label*)&ex->l; cb_class f; f.partial_prediction = 0.; f.action = m_action; f.cost = FLT_MAX; f.probability = -1.f; ld->costs.push_back(f); } virtual String^ ToString() override { return "shared"; } virtual void ReadFromExample(example* ex) { } }; public ref class SimpleLabel sealed : ILabel { private: float m_label; Nullable m_weight; Nullable m_initial; public: SimpleLabel() : m_label(0) { } [JsonProperty] property float Label { float get() { return m_label; } void set(float value) { m_label = value; } } [JsonProperty(NullValueHandling = NullValueHandling::Ignore)] property Nullable Weight { Nullable get() { return m_weight; } void set(Nullable value) { m_weight = value; } } [JsonProperty(NullValueHandling = NullValueHandling::Ignore)] property Nullable Initial { Nullable get() { return m_initial; } void set(Nullable value) { m_initial = value; } } virtual void ReadFromExample(example* ex) { label_data* ld = (label_data*)&ex->l; m_label = ld->label; m_weight = ld->weight; m_initial = ld->initial; } virtual void UpdateExample(vw* vw, example* ex) { label_data* ld = (label_data*)&ex->l; ld->label = m_label; if (m_weight.HasValue) ld->weight = m_weight.Value; if (m_initial.HasValue) ld->initial = m_initial.Value; count_label(vw->sd, ld->label); } virtual String^ ToString() override { auto sb = gcnew StringBuilder; sb->Append(m_label.ToString(CultureInfo::InvariantCulture)); if (m_weight.HasValue) { sb->Append(L' '); sb->Append(m_weight.Value.ToString(CultureInfo::InvariantCulture)); if (m_initial.HasValue) { sb->Append(L' '); sb->Append(m_initial.Value.ToString(CultureInfo::InvariantCulture)); } } return sb->ToString(); } }; public ref class MulticlassLabel sealed : ILabel { public: ref class Label sealed { private: uint32_t m_class; Nullable m_weight; public: property uint32_t Class { uint32_t get() { return m_class; } void set(uint32_t value) { m_class = value; } } [JsonProperty(NullValueHandling = NullValueHandling::Ignore)] property Nullable Weight { Nullable get() { return m_weight; } void set(Nullable value) { m_weight = value; } } }; private: List^ m_classes; public: [JsonProperty] property List^ Classes { List^ get() { return m_classes; } void set(List^ value) { m_classes = value; } } virtual void ReadFromExample(example* ex) { throw gcnew NotImplementedException("to be done..."); } virtual void UpdateExample(vw* vw, example* ex) { throw gcnew NotImplementedException("to be done..."); } virtual String^ ToString() override { auto sb = gcnew StringBuilder; for each (Label^ label in m_classes) { sb->Append(L' '); sb->Append(label->Class.ToString(CultureInfo::InvariantCulture)); if (label->Weight.HasValue) { sb->Append(L' '); sb->Append(label->Weight.Value.ToString(CultureInfo::InvariantCulture)); } } // strip first space if (sb->Length > 0) sb->Remove(0, 1); return sb->ToString(); } }; public ref class StringLabel sealed : ILabel { private: String^ m_label; public: StringLabel() { } StringLabel(String^ label) : m_label(label) { } [JsonProperty] property String^ Label { String^ get() { return m_label; } void set(String^ value) { m_label = value; } } virtual void ReadFromExample(example* ex) { throw gcnew NotImplementedException("to be done..."); } virtual void UpdateExample(vw* vw, example* ex) { auto bytes = System::Text::Encoding::UTF8->GetBytes(m_label); auto valueHandle = GCHandle::Alloc(bytes, GCHandleType::Pinned); try { VW::parse_example_label(*vw, *ex, reinterpret_cast(valueHandle.AddrOfPinnedObject().ToPointer())); } CATCHRETHROW finally { valueHandle.Free(); } } virtual String^ ToString() override { return m_label; } }; } } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_labelcomparator.h000066400000000000000000000041571332666127000225340ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #pragma once #include "vw_example.h" namespace VW { ref class VowpalWabbitExample; /// /// Interface for label comparators. /// public interface class IVowpalWabbitLabelComparator { public: /// /// Compares labels of and . /// /// Returns null if labels are equivalent, otherwise returns the difference description. String^ Diff(VowpalWabbitExample^ ex1, VowpalWabbitExample^ ex2); }; /// /// A label comparer for simple labels. /// public ref class VowpalWabbitSimpleLabelComparator sealed : IVowpalWabbitLabelComparator { public: /// /// Compares labels of and . /// /// Returns null if labels are equivalent, otherwise returns the difference description. virtual String^ Diff(VowpalWabbitExample^ ex1, VowpalWabbitExample^ ex2) sealed; }; /// /// A label comparer for contextual bandit label. /// public ref class VowpalWabbitContextualBanditLabelComparator sealed : IVowpalWabbitLabelComparator { public: /// /// Compares labels of and . /// /// Returns null if labels are equivalent, otherwise returns the difference description. virtual String^ Diff(VowpalWabbitExample^ ex1, VowpalWabbitExample^ ex2) sealed; }; /// /// Label comparator factory. /// public ref class VowpalWabbitLabelComparator sealed abstract { public: /// /// Simple label comparator. /// static initonly IVowpalWabbitLabelComparator^ Simple = gcnew VowpalWabbitSimpleLabelComparator; /// /// Contextual bandit label comparator. /// static initonly IVowpalWabbitLabelComparator^ ContextualBandit = gcnew VowpalWabbitContextualBanditLabelComparator; }; } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_model.cpp000066400000000000000000000024131332666127000210110ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #include "vw_clr.h" #include "vw_model.h" #include "parse_regressor.h" #include "parse_args.h" #include "clr_io.h" namespace VW { VowpalWabbitSettings^ AddTestOnly(VowpalWabbitSettings^ settings) { // VowpalWabbitModel and VowpalWabbit instances seeded from VowpalWabbitModel // need to have the same "test" setting, otherwise the stride shift is different // and all hell breaks loose. if (!settings->Arguments->Contains("-t ") && !settings->Arguments->Contains("--testonly ") && !settings->Arguments->EndsWith("-t") && !settings->Arguments->EndsWith("--testonly")) { settings->Arguments += " -t"; } return settings; } VowpalWabbitModel::VowpalWabbitModel(VowpalWabbitSettings^ settings) : VowpalWabbitBase(AddTestOnly(settings)) { if (settings == nullptr) throw gcnew ArgumentNullException("settings"); if (settings->Model != nullptr) throw gcnew ArgumentNullException("VowpalWabbitModel cannot be initialized from another model"); } VowpalWabbitModel::VowpalWabbitModel(String^ args) : VowpalWabbitModel(gcnew VowpalWabbitSettings(args)) { } } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_model.h000066400000000000000000000014151332666127000204570ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #pragma once #include "vw_clr.h" #include "vw_base.h" namespace VW { /// /// VowpalWabbit model wrapper used in multi-threaded scenarios. /// public ref class VowpalWabbitModel : public VowpalWabbitBase { public: /// /// Initializes a new instance. /// /// Arguments passed to native instance. VowpalWabbitModel(VowpalWabbitSettings^ settings); /// Command line arguments passed to native instance. VowpalWabbitModel(String^ args); }; } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_prediction.cpp000066400000000000000000000121051332666127000220500ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #include "vw_prediction.h" #include "vw_example.h" #include "vw_base.h" #include "vowpalwabbit.h" namespace VW { void CheckExample(vw* vw, example* ex, prediction_type::prediction_type_t type) { if (vw == nullptr) throw gcnew ArgumentNullException("vw"); if (ex == nullptr) throw gcnew ArgumentNullException("ex"); auto ex_pred_type = vw->l->pred_type; if (ex_pred_type != type) { auto sb = gcnew StringBuilder(); sb->Append("Prediction type must be "); sb->Append(gcnew String(prediction_type::to_string(type))); sb->Append(" but is "); sb->Append(gcnew String(prediction_type::to_string(ex_pred_type))); throw gcnew ArgumentException(sb->ToString()); } } float VowpalWabbitScalarPredictionFactory::Create(vw* vw, example* ex) { CheckExample(vw, ex, PredictionType); try { return VW::get_prediction(ex); } CATCHRETHROW } VowpalWabbitScalar VowpalWabbitScalarConfidencePredictionFactory::Create(vw* vw, example* ex) { CheckExample(vw, ex, PredictionType); try { VowpalWabbitScalar ret; ret.Value = VW::get_prediction(ex); ret.Confidence = ex->confidence; return ret; } CATCHRETHROW } cli::array^ VowpalWabbitScalarsPredictionFactory::Create(vw* vw, example* ex) { CheckExample(vw, ex, PredictionType); try { auto& scalars = ex->pred.scalars; auto values = gcnew cli::array((int)scalars.size()); int index = 0; for (float s : scalars) values[index++] = s; return values; } CATCHRETHROW } float VowpalWabbitProbabilityPredictionFactory::Create(vw* vw, example* ex) { CheckExample(vw, ex, PredictionType); return ex->pred.prob; } float VowpalWabbitCostSensitivePredictionFactory::Create(vw* vw, example* ex) { CheckExample(vw, ex, PredictionType); try { return VW::get_cost_sensitive_prediction(ex); } CATCHRETHROW } Dictionary^ VowpalWabbitMulticlassProbabilitiesPredictionFactory::Create(vw* vw, example* ex) { #if _DEBUG if (ex == nullptr) throw gcnew ArgumentNullException("ex"); #endif v_array confidence_scores; try { confidence_scores = VW::get_cost_sensitive_prediction_confidence_scores(ex); } CATCHRETHROW auto values = gcnew Dictionary(); int i = 0; for (auto& val : confidence_scores) { values->Add(++i, val); } return values; } uint32_t VowpalWabbitMulticlassPredictionFactory::Create(vw* vw, example* ex) { CheckExample(vw, ex, PredictionType); return ex->pred.multiclass; } cli::array^ VowpalWabbitMultilabelPredictionFactory::Create(vw* vw, example* ex) { CheckExample(vw, ex, prediction_type::multilabels); size_t length; uint32_t* labels; try { labels = VW::get_multilabel_predictions(ex, length); } CATCHRETHROW if (length > Int32::MaxValue) throw gcnew ArgumentOutOfRangeException("Multi-label predictions too large"); auto values = gcnew cli::array((int)length); if (length > 0) Marshal::Copy(IntPtr(labels), values, 0, (int)length); return values; } cli::array^ VowpalWabbitActionScoreBasePredictionFactory::Create(vw* vw, example* ex) { CheckExample(vw, ex, PredictionType); auto& a_s = ex->pred.a_s; auto values = gcnew cli::array((int)a_s.size()); auto index = 0; for (auto& as : a_s) { values[index].Action = as.action; values[index].Score = as.score; index++; } return values; } cli::array^ VowpalWabbitTopicPredictionFactory::Create(vw* vw, example* ex) { if (ex == nullptr) throw gcnew ArgumentNullException("ex"); auto values = gcnew cli::array(vw->lda); Marshal::Copy(IntPtr(ex->pred.scalars.begin()), values, 0, vw->lda); return values; } System::Object^ VowpalWabbitDynamicPredictionFactory::Create(vw* vw, example* ex) { if (ex == nullptr) throw gcnew ArgumentNullException("ex"); switch (vw->l->pred_type) { case prediction_type::scalar: return VowpalWabbitPredictionType::Scalar->Create(vw, ex); case prediction_type::scalars: return VowpalWabbitPredictionType::Scalars->Create(vw, ex); case prediction_type::multiclass: return VowpalWabbitPredictionType::Multiclass->Create(vw, ex); case prediction_type::multilabels: return VowpalWabbitPredictionType::Multilabel->Create(vw, ex); case prediction_type::action_scores: return VowpalWabbitPredictionType::ActionScore->Create(vw, ex); case prediction_type::action_probs: return VowpalWabbitPredictionType::ActionProbabilities->Create(vw, ex); case prediction_type::prob: return VowpalWabbitPredictionType::Probability->Create(vw, ex); case prediction_type::multiclassprobs: return VowpalWabbitPredictionType::MultiClassProbabilities->Create(vw, ex); default: { auto sb = gcnew StringBuilder(); sb->Append("Unsupported prediction type: "); sb->Append(gcnew String(prediction_type::to_string(vw->l->pred_type))); throw gcnew ArgumentException(sb->ToString()); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_prediction.h000066400000000000000000000262331332666127000215240ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #pragma once #include "vw_clr.h" namespace VW { ref class VowpalWabbitExample; ref class VowpalWabbit; using namespace System::Collections::Generic; /// /// Interface for prediction factories enabling read-out of various prediction results in an extendable manner. /// generic public interface class IVowpalWabbitPredictionFactory { public: /// /// Creates a new prediction result from an example and the associated VW instance. /// /// A prediction result. /// Implementation must be thread-safe. T Create(vw* vw, example* ex); /// /// Returns the supported prediction type. /// property prediction_type::prediction_type_t PredictionType { prediction_type::prediction_type_t get(); } }; /// /// A scalar prediction result. /// public ref class VowpalWabbitDynamicPredictionFactory sealed : IVowpalWabbitPredictionFactory { public: /// /// Extracts prediction results from example. /// virtual System::Object^ Create(vw* vw, example* ex) sealed; /// /// Returns the supported prediction type. /// property prediction_type::prediction_type_t PredictionType { virtual prediction_type::prediction_type_t get() sealed { throw gcnew NotSupportedException("Prediction type is not available."); } } }; /// /// A scalar prediction result. /// public ref class VowpalWabbitScalarPredictionFactory sealed : IVowpalWabbitPredictionFactory { public: /// /// Extracts prediction results from example. /// virtual float Create(vw* vw, example* ex) sealed; /// /// Returns the supported prediction type. /// property prediction_type::prediction_type_t PredictionType { virtual prediction_type::prediction_type_t get() sealed { return prediction_type::scalar; } } }; public value struct VowpalWabbitScalar { public: float Value; float Confidence; }; /// /// A scalar prediction result. /// public ref class VowpalWabbitScalarConfidencePredictionFactory sealed : IVowpalWabbitPredictionFactory { public: /// /// Extracts prediction results from example. /// virtual VowpalWabbitScalar Create(vw* vw, example* ex) sealed; /// /// Returns the supported prediction type. /// property prediction_type::prediction_type_t PredictionType { virtual prediction_type::prediction_type_t get() sealed { return prediction_type::scalar; } } }; /// /// A scalar prediction result. /// public ref class VowpalWabbitScalarsPredictionFactory sealed : IVowpalWabbitPredictionFactory^> { public: /// /// Extracts prediction results from example. /// virtual cli::array^ Create(vw* vw, example* ex) sealed; /// /// Returns the supported prediction type. /// property prediction_type::prediction_type_t PredictionType { virtual prediction_type::prediction_type_t get() sealed { return prediction_type::scalars; } } }; /// /// A scalar prediction result. /// public ref class VowpalWabbitProbabilityPredictionFactory sealed : IVowpalWabbitPredictionFactory { public: /// /// Extracts prediction results from example. /// virtual float Create(vw* vw, example* ex) sealed; /// /// Returns the supported prediction type. /// property prediction_type::prediction_type_t PredictionType { virtual prediction_type::prediction_type_t get() sealed { return prediction_type::prob; } } }; /// /// A cost sensitive prediction result. /// public ref class VowpalWabbitCostSensitivePredictionFactory sealed : IVowpalWabbitPredictionFactory { public: /// /// Extracts cost sensitive prediction results from example. /// virtual float Create(vw* vw, example* ex) sealed; /// /// Returns the supported prediction type. /// property prediction_type::prediction_type_t PredictionType { virtual prediction_type::prediction_type_t get() sealed { return prediction_type::multiclass; } } }; /// /// A cost sensitive prediction result. /// public ref class VowpalWabbitMulticlassPredictionFactory sealed : IVowpalWabbitPredictionFactory { public: /// /// Extracts cost sensitive prediction results from example. /// virtual uint32_t Create(vw* vw, example* ex) sealed; /// /// Returns the supported prediction type. /// property prediction_type::prediction_type_t PredictionType { virtual prediction_type::prediction_type_t get() sealed { return prediction_type::multiclass; } } }; /// /// A cost sensitive prediction result with associated confidence score /// For -oaa --probabilities /// public ref class VowpalWabbitMulticlassProbabilitiesPredictionFactory sealed : IVowpalWabbitPredictionFactory^> { public: /// /// Extracts cost sensitive prediction results from example, including confidence score. /// virtual Dictionary^ Create(vw* vw, example* ex) sealed; /// /// Returns the supported prediction type. /// property prediction_type::prediction_type_t PredictionType { virtual prediction_type::prediction_type_t get() sealed { return prediction_type::multiclassprobs; } } }; /// /// A multi label prediction result. /// public ref class VowpalWabbitMultilabelPredictionFactory sealed : IVowpalWabbitPredictionFactory^> { public: /// /// Extracts multilabel prediction results from example. /// virtual cli::array^ Create(vw* vw, example* ex) sealed; /// /// Returns the supported prediction type. /// property prediction_type::prediction_type_t PredictionType { virtual prediction_type::prediction_type_t get() sealed { return prediction_type::multilabels; } } }; [System::Diagnostics::DebuggerDisplay("{Action}:{Score}")] public value struct ActionScore sealed { public: property uint32_t Action; property float Score; }; /// /// A action score/probability result. /// public ref class VowpalWabbitActionScoreBasePredictionFactory abstract : IVowpalWabbitPredictionFactory^> { public: /// /// Extracts multilabel prediction results from example. /// virtual cli::array^ Create(vw* vw, example* ex) sealed; /// /// Returns the supported prediction type. /// property prediction_type::prediction_type_t PredictionType { virtual prediction_type::prediction_type_t get() abstract; } }; /// /// A action score prediction result. /// public ref class VowpalWabbitActionScorePredictionFactory sealed : public VowpalWabbitActionScoreBasePredictionFactory { public: /// /// Returns the supported prediction type. /// property prediction_type::prediction_type_t PredictionType { virtual prediction_type::prediction_type_t get() override sealed { return prediction_type::action_scores; } } }; /// /// A multi label prediction result. /// public ref class VowpalWabbitActionProbabilitiesPredictionFactory sealed : public VowpalWabbitActionScoreBasePredictionFactory { public: /// /// Returns the supported prediction type. /// property prediction_type::prediction_type_t PredictionType { virtual prediction_type::prediction_type_t get() override sealed { return prediction_type::action_probs; } } }; /// /// A topic prediction result. /// public ref class VowpalWabbitTopicPredictionFactory sealed : IVowpalWabbitPredictionFactory^> { public: /// /// Extracts prediction results from example. The predicted topics. /// virtual cli::array^ Create(vw* vw, example* ex) sealed; /// /// Returns the supported prediction type. /// property prediction_type::prediction_type_t PredictionType { virtual prediction_type::prediction_type_t get() sealed { throw gcnew NotSupportedException("Prediction type is not available."); } } }; /// /// Provides convenient collection of all prediction types. /// public ref class VowpalWabbitPredictionType sealed abstract { public: /// /// Use for scalar predictions. /// static initonly VowpalWabbitScalarPredictionFactory^ Scalar = gcnew VowpalWabbitScalarPredictionFactory; /// /// Use for scalar predictions. /// static initonly VowpalWabbitScalarConfidencePredictionFactory^ ScalarConfidence = gcnew VowpalWabbitScalarConfidencePredictionFactory; /// /// Use for scalar predictions. /// static initonly VowpalWabbitScalarsPredictionFactory^ Scalars = gcnew VowpalWabbitScalarsPredictionFactory; /// /// Use for cost sensitive predictions. /// static initonly VowpalWabbitCostSensitivePredictionFactory^ CostSensitive = gcnew VowpalWabbitCostSensitivePredictionFactory; /// /// Use for multi label predictions. /// static initonly VowpalWabbitMultilabelPredictionFactory^ Multilabel = gcnew VowpalWabbitMultilabelPredictionFactory; /// /// Use for multi class predictions. /// static initonly VowpalWabbitMulticlassPredictionFactory^ Multiclass = gcnew VowpalWabbitMulticlassPredictionFactory; /// /// Use for action score predictions. /// static initonly VowpalWabbitActionScorePredictionFactory^ ActionScore = gcnew VowpalWabbitActionScorePredictionFactory; /// /// Use for action score predictions. /// static initonly VowpalWabbitActionProbabilitiesPredictionFactory^ ActionProbabilities = gcnew VowpalWabbitActionProbabilitiesPredictionFactory; /// /// Use for LDA topic predictions. /// static initonly VowpalWabbitTopicPredictionFactory^ Topic = gcnew VowpalWabbitTopicPredictionFactory; /// /// Use for dynamicially determined predictions. /// static initonly VowpalWabbitDynamicPredictionFactory^ Dynamic = gcnew VowpalWabbitDynamicPredictionFactory; /// /// Use for dynamicially determined predictions. /// static initonly VowpalWabbitProbabilityPredictionFactory^ Probability = gcnew VowpalWabbitProbabilityPredictionFactory; /// /// Use for multiclass predictions with probabilities /// static initonly VowpalWabbitMulticlassProbabilitiesPredictionFactory^ MultiClassProbabilities = gcnew VowpalWabbitMulticlassProbabilitiesPredictionFactory; }; }vowpal-wabbit-8.6.1.dfsg1/cs/cli/vw_settings.h000066400000000000000000000077311332666127000212260ustar00rootroot00000000000000/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #pragma once #include using namespace System; using namespace System::Collections::Generic; using namespace System::IO; using namespace System::Threading::Tasks; using namespace VW::Serializer; namespace VW { ref class VowpalWabbit; ref class VowpalWabbitModel; ref class VowpalWabbitSettings; public enum class VowpalWabbitExampleDistribution { /// /// Statistically safer option. /// UniformRandom = 0, /// /// Better runtime performance. /// RoundRobin = 1 }; public interface class ITypeInspector { public: Schema^ CreateSchema(VowpalWabbitSettings^ settings, Type^ type); }; /// /// Settings for wrapper. /// /// Constructor with optional arguments was dropped as it broke version remapping (signature changed with the introduction of new options). public ref class VowpalWabbitSettings : public ICloneable { public: VowpalWabbitSettings() { Arguments = String::Empty; ExampleCountPerRun = 1000; MaxExampleCacheSize = UINT32_MAX; MaxExampleQueueLengthPerInstance = UINT32_MAX; EnableExampleCaching = false; // default to the statistically more safe option ExampleDistribution = VowpalWabbitExampleDistribution::UniformRandom; EnableStringExampleGeneration = false; EnableStringFloatCompact = false; PropertyConfiguration = ::PropertyConfiguration::Default; EnableThreadSafeExamplePooling = false; MaxExamples = INT32_MAX; Verbose = false; } VowpalWabbitSettings(String^ arguments) : VowpalWabbitSettings() { if (arguments != nullptr) Arguments = arguments; } /// /// Command line arguments. /// property String^ Arguments; /// /// Model used for initialization. /// property Stream^ ModelStream; /// /// Shared native vowpwal wabbit data structure. /// property VowpalWabbitModel^ Model; property ParallelOptions^ ParallelOptions; /// /// Set to true to disable example caching when used with a serializer. Defaults to true. /// property bool EnableExampleCaching; /// /// Maximum number of serialized examples cached. Defaults to UINT32_MAX. /// property uint32_t MaxExampleCacheSize; /// /// Maximum number of examples accepted by VowpalWabbitManager until Learn/Predict/... start to block. Defaults to UINT32_MAX. /// property uint32_t MaxExampleQueueLengthPerInstance; property uint32_t Node; property VowpalWabbit^ Root; property VowpalWabbitExampleDistribution ExampleDistribution; /// /// In multi-threaded mode, this is the number of examples processed per run. /// After ecah run the models are synchronized. /// Defaults to 1000. /// property uint32_t ExampleCountPerRun; /// /// Enable Vowpal Wabbit native string generation. /// property bool EnableStringExampleGeneration; /// /// Enable compact float serialization for Vowpal Wabbit native string generation. /// property bool EnableStringFloatCompact; property VW::Serializer::Schema^ Schema; property VW::Serializer::Schema^ ActionDependentSchema; property List^ CustomFeaturizer; property ITypeInspector^ TypeInspector; property PropertyConfiguration^ PropertyConfiguration; property bool EnableThreadSafeExamplePooling; property int MaxExamples; property bool Verbose; /// /// Action invoked for each trace message. /// /// /// The trace listener obeys the Verbose property, which defaults to false. /// property Action^ TraceListener; virtual Object^ Clone() { return MemberwiseClone(); } }; } vowpal-wabbit-8.6.1.dfsg1/cs/common/000077500000000000000000000000001332666127000172125ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/common/Bag.cs000066400000000000000000000120011332666127000202240ustar00rootroot00000000000000using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading; using System.Threading.Tasks; namespace VW { /// /// Interface to model simple bag. /// /// public interface IBag { /// /// Try add to this bag. /// /// The item to add. /// True if succesful, false otherwise. bool TryAdd(T item); /// /// Remove and return one item from this bag. /// /// The item removed from the bag or default(T) if there is no item available. T Remove(); /// /// Remove and return all items from this bag. /// /// The items removed from the gag. IEnumerable RemoveAll(); /// /// The number of items this bag contains. /// int Count { get; } } /// /// Factory class for various bag implementations. /// public static class Bag { /// /// Creates a simple bound or unbound, not thread-safe bag object. /// /// The type of the items. /// The maximum number of items this bag should hold. /// A new bag instance. public static IBag Create(int max = int.MaxValue) { return max == int.MaxValue ? (IBag)new BagImpl() : new BoundedBagImpl(max); } /// /// Creates an unbound thread-safe, lock free bag. /// /// The type of the items. /// A new bag instance. public static IBag CreateLockFree(int max = int.MaxValue) { return new LockFreeBagImpl(max); } private abstract class BaseBagImpl { protected readonly Stack stack; internal BaseBagImpl() { this.stack = new Stack(); } public T Remove() { return this.stack.Count == 0 ? default(T) : this.stack.Pop(); } public IEnumerable RemoveAll() { var ret = this.stack.ToArray(); this.stack.Clear(); return ret; } public int Count { get { return this.stack.Count; } } } private sealed class BagImpl : BaseBagImpl, IBag { public bool TryAdd(T item) { this.stack.Push(item); return true; } } private sealed class BoundedBagImpl : BaseBagImpl, IBag { private readonly int max; internal BoundedBagImpl(int max) { this.max = max; } public bool TryAdd(T item) { if (this.stack.Count >= this.max) return false; this.stack.Push(item); return true; } } /// /// This is a good read on performance: http://msdn.microsoft.com/en-us/concurrency/ee851578.aspx /// For streaming training we are seeking good performance for a single producer and multiple consumers. /// /// private sealed class LockFreeBagImpl : IBag { private readonly int max; private readonly ConcurrentQueue queue; private int count; internal LockFreeBagImpl(int max) { this.queue = new ConcurrentQueue(); this.max = max; } public bool TryAdd(T item) { if (this.count < this.max) { this.queue.Enqueue(item); Interlocked.Increment(ref this.count); return true; } return false; } public T Remove() { T result; if (this.queue.TryDequeue(out result)) { Interlocked.Decrement(ref this.count); return result; } return default(T); } public IEnumerable RemoveAll() { // TODO: violates the lock constraint. though this is just used at disposable time return this.queue; } public int Count { get { return this.count; } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/common/Properties/000077500000000000000000000000001332666127000213465ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/common/Properties/AssemblyInfo.cs000066400000000000000000000022311332666127000242660ustar00rootroot00000000000000//------------------------------------------------------------------------------ // // This code was generated by a tool. // Runtime Version:4.0.30319.42000 // // Changes to this file may cause incorrect behavior and will be lost if // the code is regenerated. // //------------------------------------------------------------------------------ [assembly: System.Reflection.AssemblyTitle("Vowpal Wabbit Common")] [assembly: System.Reflection.AssemblyDescription("Vowpal Wabbit Common")] [assembly: System.Reflection.AssemblyCompany("Microsoft Corp")] [assembly: System.Reflection.AssemblyProduct("Vowpal Wabbit")] [assembly: System.Reflection.AssemblyCopyright("Copyright (C) Microsoft Corp 2012-2016, Yahoo! Inc. 2007-2012, and many individua" + "l contributors. All rights reserved")] [assembly: System.Runtime.InteropServices.ComVisible(false)] [assembly: System.CLSCompliant(false)] [assembly: System.Runtime.InteropServices.Guid("091c7906-1f69-44d5-a15f-fb29847a68ef")] [assembly: System.Reflection.AssemblyVersion("8.4.0.1")] [assembly: System.Reflection.AssemblyFileVersion("8.4.0.1")] vowpal-wabbit-8.6.1.dfsg1/cs/common/Reflection/000077500000000000000000000000001332666127000213045ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/common/Reflection/InspectionHelper.cs000066400000000000000000000060011332666127000251030ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; namespace VW.Reflection { /// /// Utilitiy class supporting feature type inspection. /// public static class InspectionHelper { /// /// Determines if the is a supported numeric type. /// /// The type to be inspected. /// True if numeric, false otherwise. public static bool IsNumericType(Type elemType) { return IsNumericTypeInternal(elemType) || (elemType != null && elemType.IsGenericType && elemType.GetGenericTypeDefinition() == typeof(Nullable<>) && IsNumericTypeInternal(elemType.GetGenericArguments()[0])); } private static bool IsNumericTypeInternal(Type elemType) { return elemType == typeof(double) || elemType == typeof(float) || elemType == typeof(byte) || elemType == typeof(sbyte) || elemType == typeof(char) || elemType == typeof(decimal) || elemType == typeof(UInt16) || elemType == typeof(UInt32) || elemType == typeof(UInt64) || elemType == typeof(Int16) || elemType == typeof(Int32) || elemType == typeof(Int64); } /// /// If is an enumerable type (such as array or ), this method will /// return the element type. /// /// The type to be inspected. /// If is an enumerable type the element type is returned, otherwise null. public static Type GetEnumerableElementType(Type type) { Contract.Requires(type != null); if (type.IsArray) return type.GetElementType(); var enumerableType = type.GetInterfaces().Union(new[] { type }) .FirstOrDefault(it => it.IsGenericType && it.GetGenericTypeDefinition() == typeof(IEnumerable<>)); // let's get T of IEnumerable if (enumerableType != null) return enumerableType.GetGenericArguments()[0]; return null; } } } vowpal-wabbit-8.6.1.dfsg1/cs/common/Reflection/ReflectionHelper.cs000066400000000000000000000307731332666127000250770ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.Contracts; using System.IO; using System.Linq; using System.Linq.Expressions; using System.Reflection; using System.Reflection.Emit; namespace VW.Reflection { /// /// Reflection helper to find methods on visitors. /// public static class ReflectionHelper { /// /// Compiles the supplied to a callable function. /// /// The source expression to be compiled. /// A callable function. /// Can't constraint on Func (or would have to have 11 overloads) nor is it possible to constaint on delegate. public static System.Delegate CompileToFunc(this Expression sourceExpression) { // inspect T to be Func<...> var funcType = typeof(T); if (!funcType.Name.StartsWith("Func`")) throw new ArgumentException("T must be one of the System.Func<...> type."); var genericArguments = funcType.GetGenericArguments(); var returnType = genericArguments.Last(); var paramTypes = genericArguments.Take(genericArguments.Length - 1); // sign serializer so we can get access to internal members. var asmName = new AssemblyName("VowpalWabbitSerializer"); StrongNameKeyPair kp; using (var stream = typeof(ReflectionHelper).Assembly.GetManifestResourceStream("VW.vw_key.snk")) using (var memStream = new MemoryStream()) { stream.CopyTo(memStream, 1024); kp = new StrongNameKeyPair(memStream.ToArray()); } asmName.KeyPair = kp; var dynAsm = AppDomain.CurrentDomain.DefineDynamicAssembly(asmName, AssemblyBuilderAccess.RunAndSave); // Create a dynamic module and type //#if !DEBUG //var moduleBuilder = dynAsm.DefineDynamicModule("VowpalWabbitSerializerModule", asmName.Name + ".dll", true); //#else var moduleBuilder = dynAsm.DefineDynamicModule("VowpalWabbitSerializerModule"); var typeBuilder = moduleBuilder.DefineType("VowpalWabbitSerializer" + Guid.NewGuid().ToString().Replace('-', '_')); // Create our method builder for this type builder const string methodName = "Method"; var methodBuilder = typeBuilder.DefineMethod( methodName, MethodAttributes.Public | MethodAttributes.Static, returnType, paramTypes.ToArray()); // compared to Compile this looks rather ugly, but there is a feature-bug // that adds a security check to every call of the Serialize method //#if !DEBUG //var debugInfoGenerator = DebugInfoGenerator.CreatePdbGenerator(); //visit.CompileToMethod(methodBuilder, debugInfoGenerator); //#else sourceExpression.CompileToMethod(methodBuilder); //#endif var dynType = typeBuilder.CreateType(); // for debugging only // dynAsm.Save(@"my.dll"); return Delegate.CreateDelegate(typeof(T), dynType.GetMethod(methodName)); } /// /// TODO: replace me with Roslyn once it's released and just generate string code. This way the overload resolution is properly done. /// /// This is a simple heuristic for overload resolution, not the full thing. public static MethodInfo FindMethod(Type objectType, string name, params Type[] parameterTypes) { Contract.Requires(objectType != null); Contract.Requires(name != null); Contract.Requires(parameterTypes != null); // let's find the "best" match: // order by // 1. distance (0 = assignable, 1 = using generic) --> ascending // 2. # of interfaces implemented. the more the better (the more specific we are) --> descending // 3. # of open generics. the less the better (the more specific we are) --> ascending var methods = from m in objectType.GetMethods(BindingFlags.Instance | BindingFlags.Public) where m.Name == name let parameters = m.GetParameters() where parameters.Length == parameterTypes.Length let output = parameterTypes.Zip(parameters, (valueType, methodParameter) => Distance(methodParameter.ParameterType, valueType)).ToArray() where output.All(o => o != null) let distance = output.Sum(o => o.Distance) let interfacesImplemented = output.Sum(o => o.InterfacesImplemented) let generics = output.Sum(o => o.GenericTypes.Count) orderby distance, generics, interfacesImplemented descending select new { Method = m, Distance = distance, InterfacesImplemented = interfacesImplemented, GenericTypes = output.Select(o => o.GenericTypes) }; var bestCandidate = methods.FirstOrDefault(); if (bestCandidate == null) { return null; } MethodInfo method = bestCandidate.Method; //Debug.WriteLine("Method Search"); //foreach (var item in methods) //{ // Debug.WriteLine(string.Format("Distance={0} Interfaces={1} OpenGenerics={2} Method={3}", // item.Distance, // item.InterfacesImplemented, // item.GenericTypes.Count(gt => gt.Count > 0), // item.Method)); //} if (method.IsGenericMethod) { var mergedGenericTypes = bestCandidate.GenericTypes.SelectMany(d => d).ToLookup(kvp => kvp.Key, kvp => kvp.Value); // consistency check foreach (var gt in mergedGenericTypes) { var refElem = gt.First(); if (gt.Any(t => t != refElem)) { throw new NotSupportedException("Inconsistent generic argument mapping: " + string.Join(",", gt)); } } // map generic arguments to actual argument var actualTypes = method.GetGenericArguments().Select(t => mergedGenericTypes[t].First()).ToArray(); method = method.MakeGenericMethod(actualTypes); //Debug.WriteLine("\t specializing: " + method); } // Debug.WriteLine("Method: {0} for {1} {2}", method, name, string.Join(",", parameterTypes.Select(t => t.ToString()))); return method; } internal static TypeMatch Distance(Type candidate, Type valueType) { if (candidate == valueType) { return new TypeMatch(0) { InterfacesImplemented = candidate.GetInterfaces().Count() }; } if (candidate.IsAssignableFrom(valueType)) { return new TypeMatch(1) { InterfacesImplemented = candidate.GetInterfaces().Count() }; } if (candidate.IsGenericParameter && candidate.GetGenericParameterConstraints().All(c => c.IsAssignableFrom(valueType))) { return new TypeMatch(2, candidate, valueType) { InterfacesImplemented = candidate.GetInterfaces().Count() }; } if (candidate.IsGenericType) { // try to find a match that is assignable... // var genericCandidate = candidate.GetGenericTypeDefinition(); var bestMatches = from typeDistance in valueType.GetInterfaces().Select(it => new TypeDistance { Distance = 1, Type = it }) .Union(GetBaseTypes(valueType)) let type = typeDistance.Type where type.IsGenericType && type.GetGenericTypeDefinition() == genericCandidate let distances = candidate.GetGenericArguments().Zip(type.GetGenericArguments(), (a, b) => Distance(a, b)).ToList() where distances.All(d => d != null) let output = new TypeMatch(typeDistance.Distance, distances) { InterfacesImplemented = distances.Sum(d => d.InterfacesImplemented) + (candidate.IsInterface ? candidate.GetInterfaces().Count() : 0) } orderby output.Distance, output.InterfacesImplemented descending, output.GenericTypes.Count select output; return bestMatches.FirstOrDefault(); } return null; } internal static IEnumerable GetBaseTypes(Type type, int depth = 0) { if (type == typeof(object) || type == null) { yield break; } yield return new TypeDistance { Type = type, Distance = depth }; foreach (var item in GetBaseTypes(type.BaseType, depth + 1)) { yield return item; } } /// /// Gets the member info in a sort of type safe manner - it's better than using strings, but some runtime errors are still possbile. /// public static MemberInfo GetInfo(Expression> expression) { Contract.Requires(expression != null); return GetInfo(expression.Body); } /// /// Gets the member info in a sort of type safe manner - it's better than using strings, but some runtime errors are still possbile. /// public static MemberInfo GetInfo(Expression> expression) { Contract.Requires(expression != null); return GetInfo(expression.Body); } /// /// Gets the member info in a sort of type safe manner - it's better than using strings, but some runtime errors are still possbile. /// public static MemberInfo GetInfo(Expression expression) { Contract.Requires(expression != null); var binaryExpression = expression as BinaryExpression; if (binaryExpression != null) { if (binaryExpression.Method != null) { return binaryExpression.Method; } throw new NotSupportedException(); } var methodExpression = expression as MemberExpression; if (methodExpression != null) { return methodExpression.Member; } var methodCallExpression = expression as MethodCallExpression; if (methodCallExpression != null) { return methodCallExpression.Method; } var newExpression = expression as NewExpression; if (newExpression != null) { return newExpression.Constructor; } var unaryExpression = expression as UnaryExpression; if (unaryExpression != null) { if (unaryExpression.Method != null) { return unaryExpression.Method; } } throw new NotSupportedException(); } } } vowpal-wabbit-8.6.1.dfsg1/cs/common/Reflection/TypeDistance.cs000066400000000000000000000013571332666127000242350ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; namespace VW.Reflection { /// /// Models a distance to a given type. /// internal sealed class TypeDistance { internal int Distance { get; set; } internal Type Type { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/common/Reflection/TypeMatch.cs000066400000000000000000000035461332666127000235410ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; namespace VW.Reflection { /// /// Models a distance and additional matching constraints for a type. /// internal sealed class TypeMatch { internal TypeMatch(int distance) { this.Distance = distance; this.GenericTypes = new Dictionary { }; } internal TypeMatch(int distance, Type genericType, Type actualType) : this(distance) { Contract.Requires(genericType != null); Contract.Requires(actualType != null); this.GenericTypes = new Dictionary { { genericType, actualType } }; } internal TypeMatch(int distance, IEnumerable typeMatches) : this(distance) { Contract.Requires(typeMatches != null); this.GenericTypes = typeMatches .Where(tm => tm.GenericTypes != null) .SelectMany(tm => tm.GenericTypes) .ToDictionary(tm => tm.Key, tm => tm.Value); } internal int Distance { get; private set; } internal int InterfacesImplemented { get; set; } internal IDictionary GenericTypes { get; private set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/common/Serializer/000077500000000000000000000000001332666127000213235ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/common/Serializer/FeatureExpression.cs000066400000000000000000000202631332666127000253300ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.Contracts; using System.Linq.Expressions; using System.Reflection; using VW.Reflection; namespace VW.Serializer { /// /// Delegate defintion for feature object creation expressions. /// /// An expression resolving to a VowpalWabbit instance. /// An expression resolving to a Namespace instance. /// An expression constructing a new Feature object. public delegate Expression NewFeatureExpressionDelegate(Expression vw, Expression @namespace); /// /// Feature data composed during compilation step. /// [DebuggerDisplay("FeatureExpression({Name})")] public sealed class FeatureExpression { /// /// Initializes a new instance of the class. /// /// The type of the feature. /// The name of the feature. /// Factory to extract the value for a given feature from the example object (input argument). /// Factories to provide validation before invoking the expression created through . /// The expression must create new Feature instances. /// The namespace this feature belongs to. /// The feature group this feature belongs to. /// If true the marshaller enumerates the feature (as in creates a 1-hot encoding). /// The variable name to be used in the generated code. /// Used to order feature serialization. /// True if an anchor element should be added at the beginning of a dense feature array. /// Configures string pre-processing for this feature. /// An optional method overriding the otherwise auto-resolved serialization method. /// True if a dictionary should be build for this feature. /// The parent feature expression. public FeatureExpression(Type featureType, string name, Func valueExpressionFactory, List> valueValidExpressionFactories = null, NewFeatureExpressionDelegate featureExpressionFactory = null, string @namespace = null, char? featureGroup = null, bool enumerize = false, string variableName = null, int? order = null, bool addAnchor = false, StringProcessing stringProcessing = StringProcessing.Split, MethodInfo overrideSerializeMethod = null, bool? dictify = null, FeatureExpression parent = null) { if (featureType == null) throw new ArgumentNullException("featureType"); if (valueExpressionFactory == null) throw new ArgumentNullException("valueExpressionFactory"); Contract.EndContractBlock(); if(featureType.IsGenericType && featureType.GetGenericTypeDefinition() == typeof(Nullable<>)) { this.IsNullable = true; this.FeatureType = featureType.GetGenericArguments()[0]; } else { this.IsNullable = false; this.FeatureType = featureType; } this.Name = name; this.ValueExpressionFactory = valueExpressionFactory; this.ValueValidExpressionFactories = valueValidExpressionFactories; this.FeatureExpressionFactory = featureExpressionFactory; this.Namespace = @namespace; this.FeatureGroup = featureGroup; this.Enumerize = enumerize; this.VariableName = variableName ?? name; this.Order = order ?? 1; this.AddAnchor = addAnchor; this.Dictify = dictify ?? false; this.StringProcessing = stringProcessing; this.OverrideSerializeMethod = overrideSerializeMethod; this.Dictify = dictify ?? false; this.Parent = parent; this.DenseFeatureValueElementType = InspectionHelper.GetEnumerableElementType(featureType); if (!InspectionHelper.IsNumericType(this.DenseFeatureValueElementType)) this.DenseFeatureValueElementType = null; } /// /// The parent feature expression. /// public FeatureExpression Parent { get; private set; } /// /// True if the type is nullable. /// public bool IsNullable { get; set; } /// /// Serializer variable name. /// /// Useful to debug public string VariableName { get; set; } /// /// The type of the feature. /// public Type FeatureType { get; private set; } internal Type IntermediateFeatureType { get; set; } /// /// The name of the feature. /// public string Name { get; set; } /// /// The namespace. /// public string Namespace { get; set; } /// /// The feature group. /// public char? FeatureGroup { get; set; } /// /// An optional method overriding the otherwise auto-resolved serialization method. /// public MethodInfo OverrideSerializeMethod { get; set; } /// /// True if this feature should be enumerized. /// public bool Enumerize { get; set; } /// /// True if an anchor element should be added at the beginning of a dense feature array. /// public bool AddAnchor { get; set; } /// /// True if a dictionary should be build for this feature. /// public bool Dictify { get; set; } /// /// Factory to extract the value for a given feature from the example object (input argument). /// public Func ValueExpressionFactory { get; set; } /// /// Factories to provide validation before invoking the expression created through . /// public List> ValueValidExpressionFactories { get; set; } /// /// The expression must create new Feature instances. /// public NewFeatureExpressionDelegate FeatureExpressionFactory { get; set; } /// /// The element type of an enumerable feature type. /// public Type DenseFeatureValueElementType { get; set; } /// /// Used to order feature serialization. /// public int Order { get; set; } /// /// Configures string pre-processing for this feature. /// public StringProcessing StringProcessing { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/common/Serializer/LabelExpression.cs000066400000000000000000000027471332666127000247630ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Linq.Expressions; namespace VW.Serializer { /// /// Defines access to the label for an user-specified example type. /// public sealed class LabelExpression { /// /// The name of the label. /// public string Name { get; set; } /// /// The type of the feature. /// public Type LabelType { get; set; } /// /// Factory to extract the value for a given feature from the example object (input argument). /// public Func ValueExpressionFactory { get; set; } /// /// Factories to provide validation before invoking the expression created through . /// public List> ValueValidExpressionFactories { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/common/Serializer/PropertyConfiguration.cs000066400000000000000000000105351332666127000262320ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; namespace VW.Serializer { /// /// Constants used throughout C# wrapper. /// public sealed class PropertyConfiguration { /// /// Default value for feature ignore prefix: '_'. /// public const string FeatureIgnorePrefixDefault = "_"; /// /// Default value for text property: '_text'. /// public const string TextPropertyDefault = "_text"; /// /// Default value for label property: '_label'. /// public const string LabelPropertyDefault = "_label"; /// /// Default value for label index property: '_labelindex'. /// public const string LabelIndexPropertyDefault = "_labelindex"; /// /// Default value for label property prefix: '_label_'; /// public const string LabelPropertyPrefixDefault = "_label_"; /// /// Default value for multi property: '_multi'. /// public const string MultiPropertyDefault = "_multi"; /// /// Default singleton holding the default configuration. /// public static readonly PropertyConfiguration Default = new PropertyConfiguration(); /// /// Initializes a new instance. /// public PropertyConfiguration() { this.FeatureIgnorePrefix = FeatureIgnorePrefixDefault; this.TextProperty = TextPropertyDefault; this.LabelProperty = LabelPropertyDefault; this.MultiProperty = MultiPropertyDefault; this.LabelIndexProperty = LabelIndexPropertyDefault; this.LabelPropertyPrefix = LabelPropertyPrefixDefault; } /// /// JSON properties starting with underscore are ignored. /// public string FeatureIgnorePrefix { get; set; } /// /// JSON property "_text" is marshalled using . /// public string TextProperty { get; set; } /// /// JSON property "_label" is used as label. /// public string LabelProperty { get; set; } /// /// JSON property "_labelIndex" determines the index this label is applied for multi-line examples. /// public string LabelIndexProperty { get; set; } /// /// JSON properties starting with "_label_$name" are used to specify nested properties. Has the same effect as _label: { "$name": ... }. /// public string LabelPropertyPrefix { get; set; } /// /// JSON property "_multi" is used to signal multi-line examples. /// public string MultiProperty { get; set; } /// /// True if is considered a special property and thus should not be skipped. /// /// The JSON property name. /// True if is a special property, false otherwise. public bool IsSpecialProperty(string property) { return property.Equals(TextProperty, StringComparison.OrdinalIgnoreCase) || property.Equals(LabelProperty, StringComparison.OrdinalIgnoreCase) || property.Equals(MultiProperty, StringComparison.OrdinalIgnoreCase) || property.Equals(LabelIndexProperty, StringComparison.OrdinalIgnoreCase) || property.StartsWith(LabelPropertyPrefixDefault, StringComparison.OrdinalIgnoreCase); } } } vowpal-wabbit-8.6.1.dfsg1/cs/common/Serializer/Schema.cs000066400000000000000000000017371332666127000230620ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System.Collections.Generic; namespace VW.Serializer { /// /// Describes the serializatoin for a give type. /// public sealed class Schema { /// /// List of features to extract from type. /// public List Features { get; set; } /// /// Expression to access the label. /// public LabelExpression Label { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/common/Serializer/StringProcessing.cs000066400000000000000000000020761332666127000251620ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- namespace VW.Serializer { /// /// Options for string pre-processing before feeding to VW native. /// public enum StringProcessing { /// /// Spaces are replaced with underscores. /// Escape, /// /// Strings are split on space, producing individual features. /// Split, /// /// Spaces are replaced with underscores and the property name is used as a prefix. /// EscapeAndIncludeName } } vowpal-wabbit-8.6.1.dfsg1/cs/common/VowpalWabbitDecisionServiceInteractionHeader.cs000066400000000000000000000025711332666127000304570ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- namespace VW { /// /// Decision Service interaction header information. /// public sealed class VowpalWabbitDecisionServiceInteractionHeader { /// /// EventId extracted from Decision Service Interaction JSON data. /// public string EventId { get; set; } /// /// Actions extracted from Decision Service Interaction JSON data. /// public int[] Actions { get; set; } /// /// Probabilities extracted from Decision Service Interaction JSON data. /// public float[] Probabilities { get; set; } /// /// Probability of drop extracted from Decision Service Interaction JSON data. /// public float ProbabilityOfDrop { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/common/packages.config000066400000000000000000000002521332666127000221560ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/common/vw_common.csproj000066400000000000000000000214751332666127000224510ustar00rootroot00000000000000 Debug AnyCPU {E621E022-C1F8-433F-905A-AB9A3DE072B7} Library Properties VW VowpalWabbit.Common v4.5.2 512 0 true ..\vw_key.snk true $(SolutionDir)\x64\Debug\ DEBUG;TRACE full x64 prompt MinimumRecommendedRules.ruleset True False True False False True True True True True True True True True False True False True False False False False True False True True True False False True False False True Full %28none%29 0 ..\..\vowpalwabbit\x64\Debug\VowpalWabbit.Common.XML $(SolutionDir)\x64\Release\ TRACE true pdbonly x64 prompt MinimumRecommendedRules.ruleset ..\..\vowpalwabbit\x64\Release\VowpalWabbit.Common.XML true ..\..\vowpalwabbit\x64\DebugLeakCheck\ DEBUG;TRACE full x64 prompt MinimumRecommendedRules.ruleset This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. vowpal-wabbit-8.6.1.dfsg1/cs/cs/000077500000000000000000000000001332666127000163275ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/cs/ActionDependentFeature.cs000066400000000000000000000024721332666127000232430ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- namespace VW { /// /// A tuple of an action dependent feature and the corresponding index. /// /// The action dependent feature type. public sealed class ActionDependentFeature { internal ActionDependentFeature(int index, TActionDependentFeature feature) { this.Index = index; this.Feature = feature; } /// /// The index within the multi-line example. /// public int Index { get; private set; } /// /// The index within the multi-line example. /// public float Probability { get; set; } /// /// The feature object. /// public TActionDependentFeature Feature { get; private set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Extensions.cs000066400000000000000000000046071332666127000210240ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; using System.Text; using System.Threading.Tasks; namespace VW { /// /// LINQ extensions. /// public static class Extensions { /// /// Returns the elements specified by indicies/ /// /// The element type. /// The enumerable source. /// The indicies to be selected. /// The subset of elements. public static T[] Subset(this IEnumerable source, int[] indicies) { Contract.Requires(source != null); Contract.Requires(indicies != null); // re-shuffle var result = new T[indicies.Length]; var i = 0; foreach (var item in source) { result[indicies[i]] = item; i++; } return result; } /// /// Returns the index of the first element matching . /// /// The collection type. /// The source enumerable. /// The predicate to match. /// The index of the first element to match or -1 if none matched. public static int IndexOf(this IEnumerable source, Predicate predicate) { Contract.Requires(source != null); Contract.Requires(predicate != null); var i = 0; foreach (var t in source) { if (predicate(t)) { return i; } i++; } return -1; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/ObjectFactory.cs000066400000000000000000000060211332666127000214130ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; namespace VW { /// /// Helper to conveniently create . /// public static class ObjectFactory { /// /// Disposable object factory. /// /// The disposable context needed to create objects of . /// The type of the objects to be created. public static ObjectFactory Create(TSource context, Func creator) where TSource : class, IDisposable { return new ObjectFactory(context, creator); } } /// /// Disposable object factory. /// /// The disposable context needed to create objects of . /// The type of the objects to be created. public class ObjectFactory : IDisposable where TSource : class, IDisposable { /// /// Factory function to create new instances. /// private readonly Func creator; /// /// The source object passed to . /// private TSource source; /// /// True if this instance is already disposed. /// private bool disposed; internal ObjectFactory(TSource source, Func creator) { this.source = source; this.creator = creator; this.disposed = false; } /// /// Creates a new object of type T. /// public TObject Create() { return this.creator(source); } /// /// Cleanup. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (!disposed) { if (this.source != null) { this.source.Dispose(); this.source = null; } this.disposed = true; } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/ObjectPool.cs000066400000000000000000000177261332666127000207330ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.Diagnostics.Contracts; using System.Threading; namespace VW { /// /// Thread-safe object pool supporting versioned updates. /// /// The disposable context needed to create objects of . /// The type of the objects to be created. public class ObjectPool : IDisposable where TSource : class, IDisposable where TObject : class, IDisposable { /// /// Lock resources /// private readonly ReaderWriterLockSlim rwLockSlim; /// /// Version of the factory function. /// private int version; /// /// Used to create new pooled objects. /// private ObjectFactory factory; /// /// The actual pool. /// /// /// To maximize reuse of previously cached items within the pooled objects. /// (e.g. cached action dependent features) /// private Stack> pool; /// /// Initializes a new ObjectPool. /// /// /// An optional factory to create pooled objects on demand. /// will throw if the factory is still null when called. /// public ObjectPool(ObjectFactory factory = null) { this.rwLockSlim = new ReaderWriterLockSlim(); this.pool = new Stack>(); this.factory = factory; } /// /// Updates the object factory in a thread-safe manner. /// /// The new object factory to be used. public void UpdateFactory(ObjectFactory factory) { Stack> oldPool; ObjectFactory oldFactory; this.rwLockSlim.EnterWriteLock(); try { if (this.pool == null) { throw new ObjectDisposedException("ObjectPool already disposed"); } this.version++; oldFactory = this.factory; this.factory = factory; oldPool = this.pool; this.pool = new Stack>(); } finally { this.rwLockSlim.ExitWriteLock(); } // dispose outdated items foreach (var item in oldPool) { item.Value.Dispose(); } // dispose factory if (oldFactory != null) { oldFactory.Dispose(); } } /// /// Returns an instance of TObject from the pool or creates a new instance using the objectFactory /// if the pool is empty. /// /// This method is thread-safe. public PooledObject GetOrCreate() { int localVersion; ObjectFactory localFactory; this.rwLockSlim.EnterUpgradeableReadLock(); try { if (this.pool == null) { throw new ObjectDisposedException("ObjectPool already disposed"); } if (this.pool.Count == 0) { // create a consistent copy localVersion = this.version; localFactory = this.factory; } else { this.rwLockSlim.EnterWriteLock(); try { if (this.pool == null) { throw new ObjectDisposedException("ObjectPool already disposed"); } return this.pool.Pop(); } finally { this.rwLockSlim.ExitWriteLock(); } } } finally { this.rwLockSlim.ExitUpgradeableReadLock(); } if (localFactory == null) { throw new InvalidOperationException("Factory must be initialized before calling Get()"); } // invoke the factory outside of the lock return new PooledObject(this, localVersion, localFactory.Create()); } /// /// Returns to the pool of objects, given the version is still the same. /// Otherwise is disposed. /// /// The object to be returned. internal void ReturnObject(PooledObject pooledObject) { Contract.Ensures(pooledObject != null); this.rwLockSlim.EnterUpgradeableReadLock(); try { if (this.version == pooledObject.Version && this.pool != null) { this.rwLockSlim.EnterWriteLock(); try { // double check if (this.version == pooledObject.Version && this.pool != null) { // it's the same version, return to pool this.pool.Push(pooledObject); return; } } finally { this.rwLockSlim.ExitWriteLock(); } } } finally { this.rwLockSlim.ExitUpgradeableReadLock(); } // outdated pooledObject.Value.Dispose(); } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { this.rwLockSlim.EnterWriteLock(); try { // Dispose pool items if (this.pool != null) { foreach (var item in this.pool) { item.Value.Dispose(); } this.pool = null; } // Dispose factory if (this.factory != null) { this.factory.Dispose(); this.factory = null; } } finally { this.rwLockSlim.ExitWriteLock(); } } } } }vowpal-wabbit-8.6.1.dfsg1/cs/cs/PooledObject.cs000066400000000000000000000042571332666127000212370ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; namespace VW { /// /// A strongly-typed pooled object. /// /// The disposable context needed to create objects of . /// The type of the objects to be created. public sealed class PooledObject : IDisposable where TSource : class, IDisposable where TObject : class, IDisposable { /// /// The parent pool. /// private readonly ObjectPool pool; /// /// Initializes a new instance of the class. /// /// The parent pool. /// The version of the pool at time of creation of this instance. /// The actual pooled object. internal PooledObject(ObjectPool pool, int version, TObject value) { this.pool = pool; this.Value = value; this.Version = version; } /// /// The actual value. /// public TObject Value { get; private set; } /// /// Factory version used to create Value. /// internal int Version { get; private set; } /// /// Return to pool. /// public void Dispose() { // don't keep empty objects in pool if (this.Value != null) this.pool.ReturnObject(this); } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Properties/000077500000000000000000000000001332666127000204635ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/cs/Properties/AssemblyInfo.cs000066400000000000000000000022131332666127000234030ustar00rootroot00000000000000//------------------------------------------------------------------------------ // // This code was generated by a tool. // Runtime Version:4.0.30319.42000 // // Changes to this file may cause incorrect behavior and will be lost if // the code is regenerated. // //------------------------------------------------------------------------------ [assembly: System.Reflection.AssemblyTitle("Vowpal Wabbit")] [assembly: System.Reflection.AssemblyDescription("Vowpal Wabbit")] [assembly: System.Reflection.AssemblyCompany("Microsoft Corp")] [assembly: System.Reflection.AssemblyProduct("Vowpal Wabbit")] [assembly: System.Reflection.AssemblyCopyright("Copyright (C) Microsoft Corp 2012-2016, Yahoo! Inc. 2007-2012, and many individua" + "l contributors. All rights reserved")] [assembly: System.Runtime.InteropServices.ComVisible(false)] [assembly: System.CLSCompliant(false)] [assembly: System.Runtime.InteropServices.Guid("6a577997-af00-4ca0-8453-fdc8bbdf2a57")] [assembly: System.Reflection.AssemblyVersion("8.4.0.1")] [assembly: System.Reflection.AssemblyFileVersion("8.4.0.1")] vowpal-wabbit-8.6.1.dfsg1/cs/cs/Properties/AssemblyInfoInternal.cs000066400000000000000000000010471332666127000251040ustar00rootroot00000000000000using System.Reflection; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; // make internals available to unit test [assembly: InternalsVisibleTo("cs_unittest,PublicKey=" + "0024000004800000940000000602000000240000525341310004000001000100515aa9bda65291" + "811af92b381378bd271aff3a9e177bac69ff0e85874952fd82c0fbcb53f4e968181d07418481ee" + "2be97522d44c324aa5c683dafaa449fe66ddc65e1d9b3c0600c8820bd2be6401c6888ea88864ef" + "0b6ae5bfbf450aa1f548568d638913d82954195947e394c225cca2cd2f8132d525c2fdc0c57835" + "b87200aa")] vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/000077500000000000000000000000001332666127000204405ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/AnnotationInspector.cs000066400000000000000000000170011332666127000247670ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; using System.Linq.Expressions; using System.Reflection; using VW.Labels; using VW.Serializer.Attributes; using VW.Serializer.Intermediate; namespace VW.Serializer { /// /// Utility class analyzing compile-time annotation. /// public static class TypeInspector { /// /// All properties are used as features. /// public static readonly ITypeInspector All; /// /// Only properties annotated using Feature attribute are considered. /// public static readonly ITypeInspector Default; static TypeInspector() { All = new AnnotationInspectorAll(); Default = new AnnotationInspectorDefault(); } private sealed class AnnotationInspectorDefault : ITypeInspector { public Schema CreateSchema(VowpalWabbitSettings settings, Type type) { return TypeInspector.CreateSchema(type, featurePropertyPredicate: (_, attr) => attr != null, labelPropertyPredicate: (_, attr) => attr != null); } } private sealed class AnnotationInspectorAll : ITypeInspector { public Schema CreateSchema(VowpalWabbitSettings settings, Type type) { return TypeInspector.CreateSchema(type, featurePropertyPredicate: (_, __) => true, labelPropertyPredicate: (_, __) => true); } } private static Schema CreateSchema(Type type, Func featurePropertyPredicate, Func labelPropertyPredicate) { Contract.Requires(type != null); Contract.Requires(featurePropertyPredicate != null); Contract.Requires(labelPropertyPredicate != null); var validExpressions = new Stack>(); // CODE example != null validExpressions.Push(valueExpression => Expression.NotEqual(valueExpression, Expression.Constant(null))); return CreateSchema( null, type, null, null, null, // CODE example valueExpression => valueExpression, validExpressions, featurePropertyPredicate, labelPropertyPredicate); } private static Schema CreateSchema( FeatureExpression parent, Type type, string parentNamespace, char? parentFeatureGroup, bool? parentDictify, Func valueExpressionFactory, Stack> valueValidExpressionFactories, Func featurePropertyPredicate, Func labelPropertyPredicate) { var props = type.GetProperties(BindingFlags.Instance | BindingFlags.GetProperty | BindingFlags.Public); var localFeatures = (from p in props let declaredAttr = (FeatureAttribute)p.GetCustomAttributes(typeof(FeatureAttribute), true).FirstOrDefault() where featurePropertyPredicate(p, declaredAttr) let attr = declaredAttr ?? new FeatureAttribute() select new FeatureExpression( featureType: p.PropertyType, name: attr.Name ?? p.Name, // CODE example.Property valueExpressionFactory: valueExpression => Expression.Property(valueExpressionFactory(valueExpression), p), // @Reverse: make sure conditions are specified in the right order valueValidExpressionFactories: valueValidExpressionFactories.Reverse().ToList(), @namespace: attr.Namespace ?? parentNamespace, featureGroup: attr.InternalFeatureGroup ?? parentFeatureGroup, enumerize: attr.Enumerize, variableName: p.Name, order: attr.Order, addAnchor: attr.AddAnchor, stringProcessing: attr.StringProcessing, dictify: attr.InternalDictify ?? parentDictify, parent: parent) ).ToList(); var localLabels = from p in props let declaredAttr = (LabelAttribute)p.GetCustomAttributes(typeof(LabelAttribute), true).FirstOrDefault() where labelPropertyPredicate(p, declaredAttr) || typeof(ILabel).IsAssignableFrom(p.PropertyType) let attr = declaredAttr ?? new LabelAttribute() let labelType = p.PropertyType where typeof(ILabel).IsAssignableFrom(labelType) || p.PropertyType == typeof(string) select new LabelExpression { Name = p.Name, LabelType = p.PropertyType, // CODE example.Property ValueExpressionFactory = valueExpression => Expression.Property(valueExpressionFactory(valueExpression), p), // @Reverse: make sure conditions are specified in the right order ValueValidExpressionFactories = valueValidExpressionFactories.Reverse().ToList() }; // Recurse var schemas = localFeatures .Select(f => { // CODE example.Prop1.Prop2 != null valueValidExpressionFactories.Push(valueExpression => Expression.NotEqual(f.ValueExpressionFactory(valueExpression), Expression.Constant(null))); var subSchema = CreateSchema(f, f.FeatureType, f.Namespace, f.FeatureGroup, f.Dictify, f.ValueExpressionFactory, valueValidExpressionFactories, featurePropertyPredicate, labelPropertyPredicate); valueValidExpressionFactories.Pop(); return subSchema; }) .ToList(); return new Schema { Features = localFeatures.Union(schemas.SelectMany(s => s.Features)).ToList(), Label = localLabels.Union(schemas.Select(s => s.Label)).FirstOrDefault(l => l != null) }; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/Attributes/000077500000000000000000000000001332666127000225665ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/Attributes/CacheableAttribute.cs000066400000000000000000000020471332666127000266330ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; namespace VW.Serializer.Attributes { /// /// Annotate an example class that's generated output should be cached. /// [AttributeUsage(AttributeTargets.Class)] public sealed class CacheableAttribute : Attribute { /// /// Specify an equality comparer to be used for the dictionary cache. /// If non is specified, default behavior of the /// public Type EqualityComparer { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/Attributes/FeatureAttribute.cs000066400000000000000000000072311332666127000263770ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using VW.Serializer; namespace VW.Serializer.Attributes { /// /// Annotate properties that should be serialized to Vowpal Wabbit /// [AttributeUsage(AttributeTargets.Property)] public sealed class FeatureAttribute : Attribute { /// /// Initializes a new instance of the class. /// public FeatureAttribute() { this.Enumerize = false; this.AddAnchor = false; this.StringProcessing = StringProcessing.Split; } /// /// The namespace. Corresponds to the string literal after | in the native VW line format.
/// e.g. user in "|user :0.1 :0.2" ///
public string Namespace { get; set; } /// /// If true, features will be converted to string and then hashed. /// In VW line format: Age:15 (Enumerize=false), Age_15 (Enumerize=true) /// /// Defaults to false. public bool Enumerize { get; set; } /// /// If true, an anchoring feature (0:1) will be inserted at front. /// This is required if --interact is used to mark the beginning of the feature namespace, /// as 0-valued features are removed. /// /// Defaults to false. public bool AddAnchor { get; set; } /// /// If true, the string serialization will collect the feature into a dictionary and output a surrogate. /// /// Defaults to null, which inherits from parent. If no parent information available, defaults to false. public bool Dictify { get { return InternalDictify ?? false; } set { this.InternalDictify = value; } } /// /// Cope with potential null values. /// internal bool? InternalDictify { get; set; } /// /// Cope with potential null values. /// internal char? InternalFeatureGroup { get; set; } /// /// The regular VW string interface interprets the first character of the namespace as the feature group. /// public char FeatureGroup { get { return InternalFeatureGroup ?? VowpalWabbitConstants.DefaultNamespace; } set { this.InternalFeatureGroup = value; } } /// /// Allows feature name override. /// /// Defaults to reflected property name. public string Name { get; set; } /// /// Specify the serialization order. /// public int Order { get; set; } /// /// Configures string pre-processing before hashing. All options are compatible with VW string format. /// /// Defaults to public StringProcessing StringProcessing { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/Attributes/LabelAttribute.cs000066400000000000000000000013611332666127000260210ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; namespace VW.Serializer.Attributes { /// /// Used to annotate properties designated as labels. /// [AttributeUsage(AttributeTargets.Property)] public sealed class LabelAttribute : Attribute { } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/ConfigInspector.cs000066400000000000000000000204541332666127000240700ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Linq; using System.Linq.Expressions; using System.Reflection; using System.Text.RegularExpressions; namespace VW.Serializer { /// /// Constructs a marshalling schema based on string based configuration using /// this format: Property.Property.Property(Enumerize = true, Namespace = , ...) /// public static class ConfigInspector { /// /// Format: Property.Property.Property(Enumerize = true, Namespace = , ...) /// private static readonly Regex LineRegex = new Regex(@" ^\s* (?[^\s.\(]+) # first property (?:\. (?[^\s.\(]+) )* # other properties (\( # first attribute (?\w+) \s* = \s* (?[^,)\s]+) (?:\s*,\s* # other attributes (?\w+) \s* = \s* (?[^,)\s]+) \s* )* \))? \s*$ ", RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); /// /// Creates a for the given . /// /// The type to be inspected. /// An optional config restricting the properties to be used as features. /// An otpional callback to signal warnings during inspection. /// Returns a schema describing the feature marshalling. public static Schema CreateSchema(Type type, string config, Action warnings = null) { if (warnings == null) warnings = msg => { }; var featureExpressionCtor = typeof(FeatureExpression).GetConstructors()[0]; var featureExpressions = new List(); var lines = config.Split(new[] { "\r\n", "\n" }, StringSplitOptions.RemoveEmptyEntries) .Select(l => l.Trim()) .Where(l => l.Length > 0); foreach (var line in lines) { var match = LineRegex.Match(line); if (!match.Success) { warnings(string.Format("Unable to parse: '{0}'", line)); continue; } var propPath = GetGroupMatch(match.Groups["prop"]) .ToList(); var names = GetGroupMatch(match.Groups["attrname"]) .Select(n => n.ToLowerInvariant()) .ToList(); var values = GetGroupMatch(match.Groups["attrvalue"]) .ToList(); if (names.Count != values.Count) { warnings(string.Format("Number of attribute names ({0}) and values ({1}) need match. '{2}'", names.Count, values.Count, line)); continue; } var currentType = type; bool valid = true; var properties = new List(); foreach (var prop in propPath) { var property = currentType.GetProperty(prop); // ignore, since we can't find if (property == null) { warnings(string.Format("Property '{0}' not found in type: '{1}'. Specification: {2}", prop, currentType, line)); valid = false; break; } properties.Add(property); currentType = property.PropertyType; } if (!valid || properties.Count == 0) { warnings(string.Format("No properties found on '{0}' specified by '{1}'", type, line)); continue; } Func valueExpression = e => WalkPropertyPath(e, properties); var valueValidExpression = new List>(); for (int i = 1; i < properties.Count - 1; i++) { valueValidExpression.Add(e => Expression.NotEqual( WalkPropertyPath(e, properties.Take(i)), Expression.Constant(null))); } var arguments = featureExpressionCtor.GetParameters() .Select(p => { var index = names.IndexOf(p.Name.ToLowerInvariant()); if (index == -1) return p.DefaultValue; var value = values[index]; if (p.ParameterType == typeof(string)) return ParseString(value); if (p.ParameterType == typeof(char?)) return ParseChar(value); if (p.ParameterType == typeof(bool)) return Convert.ToBoolean(value); if (p.ParameterType == typeof(int?)) return Convert.ToInt32(value); warnings(string.Format("Unsupported type '{0}' for parameter '{1}'", p.ParameterType, p.Name)); return p.DefaultValue; }) .Skip(4) .ToList(); // name, valueExpressionFactory, valueValidExpressionFactories arguments.InsertRange(0, new object[] { properties.Last().PropertyType, properties.Last().Name, valueExpression, valueValidExpression }); featureExpressions.Add((FeatureExpression)featureExpressionCtor.Invoke(arguments.ToArray())); } // TODO: adf? // p1.p2(Multi) // have a different return type... // run twice. separate schemas for shared and ADF // // Options from enum to factories // new VowpalWabbit return new Schema { Features = featureExpressions }; } private static string ParseString(string value) { // supports "Markus", Markus, "Markus, Markus" var match = Regex.Match(value.Trim(), "^\"?([^\"]+)\"?$"); if (!match.Success) throw new ArgumentException("Unable to parse: '" + value + "'"); return match.Groups[1].Value; } private static char ParseChar(string value) { var match = Regex.Match(value.Trim(), "^\'?([^'])'?$"); if (!match.Success) throw new ArgumentException("Unable to parse: '" + value + "'"); return match.Groups[1].Value[0]; } private static Expression WalkPropertyPath(Expression e, IEnumerable properties) { var propAccess = Expression.Property(e, properties.First()); var leftProperties = properties.Skip(1); if (leftProperties.Count() == 0) return propAccess; return WalkPropertyPath(propAccess, leftProperties); } private static IEnumerable GetGroupMatch(Group group) { if (group.Success) { for (int i = 0; i < group.Captures.Count; i++) yield return group.Captures[i].Value; } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/IVowpalWabbitConverter.cs000066400000000000000000000020301332666127000253640ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using VW.Serializer.Intermediate; namespace VW.Serializer { /// /// Types implementing custom serialization to VW should implement this interface. /// public interface IVowpalWabbitSerializable { /// /// Marshals this object into native VW /// /// /// /// void Marshal(VowpalWabbitMarshalContext ctx, Namespace ns, Feature feature); } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/IVowpalWabbitMultiExampleSerializerCompiler.cs000066400000000000000000000014131332666127000315540ustar00rootroot00000000000000using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace VW.Serializer { /// /// Interface for multi-example serializer compilers. /// /// The example type. public interface IVowpalWabbitMultiExampleSerializerCompiler { /// /// Returns the number of action dependent examples holds. /// /// The example to inspect. /// Returns the number of action dependent examples holds. int GetNumberOfActionDependentExamples(TExample example); } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/IVowpalWabbitSerializer.cs000066400000000000000000000051531332666127000255370ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using VW.Labels; namespace VW.Serializer { /// /// Abstraction for single vs. multiline examples. /// /// The user-defined type to be serialized. public interface IVowpalWabbitSerializer : IDisposable { /// /// True if Vowpal Wabbit strings are generated, false otherwise. /// bool EnableStringExampleGeneration { get; } /// /// True if this serializer caches examples, false otherwise. /// bool CachesExamples { get; } /// /// Serializes the given into a Vowpal Wabbit string. /// /// The example to serialize. /// The optional label to serialize. /// The optional index of the example, the should be attributed to. /// Dictionary used to collect dictifyed features. /// Dictionary used to collect dictifyed features. /// The serialized Vowpal Wabbit string formatted example. string SerializeToString(TExample example, ILabel label = null, int? index = null, Dictionary dictionary = null, Dictionary fastDictionary = null); /// /// Serializes the given into a native Vowpal Wabbit example. /// /// The example to seralize. /// The optional label to serialize. /// The optional index of the example, the should be attributed to. /// A Vowpal Wabbit example ready to be used for prediction and learning. VowpalWabbitExampleCollection Serialize(TExample example, ILabel label = null, int? index = null); } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/IVowpalWabbitSerializerCompiler.cs000066400000000000000000000021241332666127000272250ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- namespace VW.Serializer { /// /// Abstraction for single vs. multiline examples. /// /// The user-defined type to be serialized. public interface IVowpalWabbitSerializerCompiler { /// /// Creates a new serializer for the given type. /// /// The VW instance this serializer is associated with. /// A ready to use serializer. IVowpalWabbitSerializer Create(VowpalWabbit vw); } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/Intermediate/000077500000000000000000000000001332666127000230525ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/Intermediate/EnumerizedFeature.cs000066400000000000000000000057531332666127000270360ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; namespace VW.Serializer.Intermediate { /// /// Feature description for enumerized features. Instead of estimating a single parameter/weight /// for a given integer, VW will estimate a parameter/weight for each value (one-hot encoding, dummy variables) /// /// The value type. public sealed class EnumerizedFeature : Feature { private readonly VowpalWabbit vw; private readonly Namespace ns; /// /// Enum hashing function. /// private readonly Func enumHashing; /// /// Initializes a new EnumerizedFeature. /// /// Vowpal Wabbit instance required for hashing. /// The associated namespace. /// The feature name. /// /// If true, enable dictionary extraction. /// The enumHash cache factory. public EnumerizedFeature(VowpalWabbit vw, Namespace ns, string name, bool addAnchor, bool dictify, Func, Func> enumHashing) : base(name, addAnchor, dictify) { if (!typeof(T).IsEnum) { throw new ArgumentException(string.Format("Type {0} must be enum", typeof(T))); } this.vw = vw; this.ns = ns; // initialize the enumHashing function this.enumHashing = enumHashing(this); } /// /// Hashes potentially using a cache. /// /// The value to be hashed. /// The hash of + public ulong FeatureHash(T value) { return this.enumHashing(value); } /// /// Hashes directly (no caching). /// /// The value to be hashed. /// The hash of + public ulong FeatureHashInternal(T value) { return this.vw.HashFeature( this.Name + Enum.GetName(typeof(T), value), this.ns.NamespaceHash); } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/Intermediate/Feature.cs000066400000000000000000000041111332666127000247710ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Diagnostics; using System.Diagnostics.Contracts; using System.Linq.Expressions; namespace VW.Serializer.Intermediate { /// /// The base feature description. /// [DebuggerDisplay("Feature({Name}, addAnchor: {AddAnchor}, dictify: {Dictify}")] public class Feature { /// /// Initializes a new Feature. /// /// /// /// public Feature(string name, bool addAnchor = false, bool dictify = false) { this.Name = name; this.AddAnchor = addAnchor; this.Dictify = dictify; } /// /// The origin property name is used as the feature name. /// public string Name { get; private set; } /// /// If true, an anchoring feature (0:1) will be inserted at front. /// This is required if --interact is used to mark the beginning of the feature namespace, /// as 0-valued features are removed. /// /// Defaults to false. public bool AddAnchor { get; private set; } /// /// If true, the string serialization will collect the feature into a dictionary and output a surrogate. /// /// Defaults to null, which inherits from parent. If no parent information available, defaults to false. public bool Dictify { get; private set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/Intermediate/Namespace.cs000066400000000000000000000063711332666127000253040ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Globalization; namespace VW.Serializer.Intermediate { /// /// The intermediate namespace representation. /// public sealed class Namespace { /// /// Initializes a new instance. /// /// VopwpalWabbit instance used for hashing. /// The namespace name. /// Defaults to space, if null. public Namespace(VowpalWabbit vw, string name, char? featureGroup) { this.Name = name; this.FeatureGroup = featureGroup ?? VowpalWabbitConstants.DefaultNamespace; if (featureGroup == null && !string.IsNullOrWhiteSpace(name)) { throw new ArgumentException("If Namespace is provided, FeatureGroup must be set too"); } // compute shared namespace hash this.NamespaceHash = name == null ? vw.HashSpace(this.FeatureGroup.ToString()) : vw.HashSpace(this.FeatureGroup + this.Name); if (vw.Settings.EnableStringExampleGeneration) this.NamespaceString = string.Format( CultureInfo.InvariantCulture, " |{0}{1}", this.FeatureGroup, this.Name); } /// /// Initializes a new instance. /// /// VopwpalWabbit instance used for hashing. /// The namespace name. First character is treated as feature group. Defaults to space. public Namespace(VowpalWabbit vw, string name = null) { if (string.IsNullOrWhiteSpace(name)) name = VowpalWabbitConstants.DefaultNamespace.ToString(); if (name.Length > 1) this.Name = name.Substring(1); this.FeatureGroup = name[0]; this.NamespaceHash = vw.HashSpace(name); if (vw.Settings.EnableStringExampleGeneration) this.NamespaceString = " |" + name; } /// /// Gets or sets the namespace name. /// public string Name { get; private set; } /// /// Gets or sets the feature group. /// public char FeatureGroup { get; private set; } /// /// The pre-calculated hash. /// public ulong NamespaceHash { get; private set; } /// /// The string representation of the namespace. /// public string NamespaceString { get; private set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/Intermediate/PreHashedFeature.cs000066400000000000000000000027761332666127000265740ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- namespace VW.Serializer.Intermediate { /// /// The intermediate feature representation. /// public sealed class PreHashedFeature : Feature { /// /// Initializes a new instance of the class. /// /// The vowpal wabbit instance. /// The namespace. /// The feature name/ /// True if an anchor needs to be added, false otherwise. /// public PreHashedFeature(VowpalWabbit vw, Namespace ns, string name, bool addAnchor = false, bool dictify = false) : base(name, addAnchor, dictify) { this.FeatureHash = vw.HashFeature(this.Name, ns.NamespaceHash); } /// /// The pre-hashed feature hash. /// public ulong FeatureHash { get; private set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/VowpalWabbitConstants.cs000066400000000000000000000014571332666127000252740ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- namespace VW.Serializer { /// /// String constants used by VW. /// public static class VowpalWabbitConstants { /// /// The VW default namespace is denoted by a blank. /// public const char DefaultNamespace = ' '; } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/VowpalWabbitDefaultMarshaller.cs000066400000000000000000000366441332666127000267250ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Globalization; using System.Text.RegularExpressions; using VW.Labels; using VW.Serializer.Intermediate; namespace VW.Serializer { /// /// The default marshaller for most types supported by VW. /// public sealed partial class VowpalWabbitDefaultMarshaller { /// /// Singleton default marshaller as it is stateless. /// public static readonly VowpalWabbitDefaultMarshaller Instance = new VowpalWabbitDefaultMarshaller(); /// /// Marshals a boolean value into native VW. /// /// e.g. loggedIn = true yields "loggedIn" in VW native string format. /// e.g. loggedIn = false yields an empty string. /// /// The marshalling context. /// The namespace description. /// The feature description. /// The actual feature value. public void MarshalFeature(VowpalWabbitMarshalContext context, Namespace ns, PreHashedFeature feature, bool value) { Contract.Requires(context != null); Contract.Requires(ns != null); Contract.Requires(feature != null); if (!value) { return; } context.NamespaceBuilder.AddFeature(feature.FeatureHash, 1f); context.AppendStringExample(feature.Dictify, " {0}", feature.Name); } /// /// Marshals an enum value into native VW. /// /// The enum type. /// The marshalling context. /// The namespace description. /// The feature description. /// The actual feature value. /// Gender = Male yields "GenderMale" in VW native string format. public void MarshalEnumFeature(VowpalWabbitMarshalContext context, Namespace ns, EnumerizedFeature feature, T value) { Contract.Requires(context != null); Contract.Requires(ns != null); Contract.Requires(feature != null); context.NamespaceBuilder.AddFeature(feature.FeatureHash(value), 1f); context.AppendStringExample(feature.Dictify, " {0}{1}", feature.Name, value); } /// /// Marshals any type into native VW, by constructing a 1-hot encoding using . /// /// The type to be enumerized. /// The marshalling context. /// The namespace description. /// The feature description. /// The actual feature value. /// is , actual value '25' and is 'Age'. /// The result is equivalent to 'Age25'. /// public void MarshalEnumerizeFeature(VowpalWabbitMarshalContext context, Namespace ns, Feature feature, T value) { Contract.Requires(context != null); Contract.Requires(ns != null); Contract.Requires(feature != null); var stringValue = feature.Name + value.ToString(); context.NamespaceBuilder.AddFeature(context.VW.HashFeature(stringValue, ns.NamespaceHash), 1f); context.AppendStringExample(feature.Dictify, " {0}", stringValue); } private static Regex escapeCharacters = new Regex("[ \t|:]", RegexOptions.Compiled); /// /// Marshals the supplied string into VW native space. Spaces are escaped using '_'. /// Only is serialized, Name is ignored. /// /// The marshalling context. /// The namespace description. /// The feature description. /// The actual feature value. /// is "New York". Result is "New_York". public void MarshalFeatureStringEscape(VowpalWabbitMarshalContext context, Namespace ns, Feature feature, string value) { Contract.Requires(context != null); Contract.Requires(ns != null); Contract.Requires(feature != null); if (string.IsNullOrWhiteSpace(value)) return; // safe escape spaces value = escapeCharacters.Replace(value, "_"); var featureHash = context.VW.HashFeature(value, ns.NamespaceHash); context.NamespaceBuilder.AddFeature(featureHash, 1f); context.AppendStringExample(feature.Dictify, " {0}", value); } /// /// Marshals the supplied string into VW native space. Spaces are escaped using '_'. Includes the in the 1-hot encoded feature. /// /// The marshalling context. /// The namespace description. /// The feature description. /// The actual feature value. /// is "New York". Name is "Location". Result is "LocationNew_York". public void MarshalFeatureStringEscapeAndIncludeName(VowpalWabbitMarshalContext context, Namespace ns, Feature feature, string value) { Contract.Requires(context != null); Contract.Requires(ns != null); Contract.Requires(feature != null); if (string.IsNullOrWhiteSpace(value)) return; // safe escape spaces value = feature.Name + escapeCharacters.Replace(value, "_"); var featureHash = context.VW.HashFeature(value, ns.NamespaceHash); context.NamespaceBuilder.AddFeature(featureHash, 1f); context.AppendStringExample(feature.Dictify, " {0}", value); } /// /// Marshals the supplied string into VW native space, by splitting the word by white space. /// /// The marshalling context. /// The namespace description. /// The feature description. /// The actual feature value. /// is "New York". Result is "New York", corresponding to 2 featuers in VW native space. public void MarshalFeatureStringSplit(VowpalWabbitMarshalContext context, Namespace ns, Feature feature, string value) { if (string.IsNullOrWhiteSpace(value)) return; var words = value.Split((char[])null, StringSplitOptions.RemoveEmptyEntries); foreach (var s in words) { var featureHash = context.VW.HashFeature(escapeCharacters.Replace(s, "_"), ns.NamespaceHash); context.NamespaceBuilder.AddFeature(featureHash, 1f); } if (context.StringExample == null) { return; } foreach (var s in words) { context.AppendStringExample(feature.Dictify, " {0}", escapeCharacters.Replace(s, "_")); } } /// /// Transfers feature data to native space. /// /// /// /// The marshalling context. /// The namespace description. /// The feature description. /// The actual feature value. public void MarshalFeature(VowpalWabbitMarshalContext context, Namespace ns, Feature feature, IEnumerable> value) { Contract.Requires(context != null); Contract.Requires(ns != null); Contract.Requires(feature != null); if (value == null) { return; } foreach (var kvp in value) { context.NamespaceBuilder.AddFeature( context.VW.HashFeature(Convert.ToString(kvp.Key), ns.NamespaceHash), Convert.ToSingle(kvp.Value, CultureInfo.InvariantCulture)); } if (context.StringExample == null) { return; } foreach (var kvp in value) { context.AppendStringExample( feature.Dictify, " {0}:" + (context.VW.Settings.EnableStringFloatCompact ? "{1}" : "{1:E20}"), Convert.ToString(kvp.Key), Convert.ToSingle(kvp.Value, CultureInfo.InvariantCulture)); } } /// /// /// /// The marshalling context. /// The namespace description. /// The feature description. /// The actual feature value. public void MarshalFeature(VowpalWabbitMarshalContext context, Namespace ns, Feature feature, IDictionary value) { Contract.Requires(context != null); Contract.Requires(ns != null); Contract.Requires(feature != null); if (value == null) { return; } foreach (DictionaryEntry item in value) { context.NamespaceBuilder.AddFeature( context.VW.HashFeature(Convert.ToString(item.Key), ns.NamespaceHash), Convert.ToSingle(item.Value, CultureInfo.InvariantCulture)); } if (context.StringExample == null) { return; } foreach (DictionaryEntry item in value) { context.AppendStringExample( feature.Dictify, " {0}:" + (context.VW.Settings.EnableStringFloatCompact ? "{1}" : "{1:E20}"), Convert.ToString(item.Key), Convert.ToSingle(item.Value, CultureInfo.InvariantCulture)); } } /// /// /// /// The marshalling context. /// The namespace description. /// The feature description. /// The actual feature value. public void MarshalFeature(VowpalWabbitMarshalContext context, Namespace ns, Feature feature, IEnumerable value) { Contract.Requires(context != null); Contract.Requires(ns != null); Contract.Requires(feature != null); if (value == null) return; foreach (var item in value) context.NamespaceBuilder.AddFeature(context.VW.HashFeature(item.Replace(' ', '_'), ns.NamespaceHash), 1f); if (context.StringExample == null) return; foreach (var item in value) context.AppendStringExample(feature.Dictify, " {0}", item); } /// /// /// /// The marshalling context. /// The namespace description. /// The feature description. /// The actual feature value. public void MarshalFeature(VowpalWabbitMarshalContext context, Namespace ns, Feature feature, IVowpalWabbitSerializable value) { Contract.Requires(context != null); Contract.Requires(ns != null); Contract.Requires(feature != null); if (value == null) return; value.Marshal(context, ns, feature); } /// /// /// /// The marshalling context. /// The namespace description. /// public int MarshalNamespace(VowpalWabbitMarshalContext context, Namespace ns, Action featureVisits) { try { // the namespace is only added on dispose, to be able to check if at least a single feature has been added context.NamespaceBuilder = context.ExampleBuilder.AddNamespace(ns.FeatureGroup); var position = 0; var stringExample = context.StringExample; if (context.StringExample != null) position = stringExample.Append(ns.NamespaceString).Length; featureVisits(); if (context.StringExample != null) { if (position == stringExample.Length) // no features added, remove namespace stringExample.Length = position - ns.NamespaceString.Length; } return (int)context.NamespaceBuilder.FeatureCount; } finally { if (context.NamespaceBuilder != null) { context.NamespaceBuilder.Dispose(); context.NamespaceBuilder = null; } } } /// /// /// /// The marshalling context. /// public void MarshalLabel(VowpalWabbitMarshalContext context, ILabel label) { if (label == null) return; context.ExampleBuilder.ApplyLabel(label); // prefix with label if (context.StringExample != null) context.StringLabel = label.ToString(); } /// /// /// /// The marshalling context. /// public void MarshalLabel(VowpalWabbitMarshalContext context, string label) { if (label == null) return; context.ExampleBuilder.ApplyLabel(new StringLabel(label)); context.StringLabel = label; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/VowpalWabbitDefaultMarshallerExt.tt000066400000000000000000000352401332666127000274170ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.Contracts; using System.Linq; using System.Globalization; using System.Text; using VW.Serializer.Intermediate; <#@ assembly name="System.Core" #> <#@ import namespace="System.Linq" #> <#@ output extension=".cs" #> namespace VW.Serializer { public partial class VowpalWabbitDefaultMarshaller { <# foreach(var t in new[] { typeof(byte), typeof(sbyte), typeof(Int16), typeof(Int32), typeof(UInt16), typeof(UInt32), typeof(float), typeof(Int64), typeof(UInt64), typeof(double) }) { #> /// /// Transfers feature data to native space. /// /// The marshalling context. /// The namespace description. /// The feature description. /// The feature value. public void MarshalFeature(VowpalWabbitMarshalContext context, Namespace ns, PreHashedFeature feature, <#=t#> value) { Contract.Requires(context != null); Contract.Requires(ns != null); Contract.Requires(feature != null); <# if(new[]{ typeof(Int64), typeof(UInt64), typeof(double) }.Contains(t)) {#> #if DEBUG if (value > float.MaxValue || value < float.MinValue) { Trace.TraceWarning("Precision lost for feature value: " + value); } #endif <# } #> context.NamespaceBuilder.AddFeature(feature.FeatureHash, (float)value); context.AppendStringExample( feature.Dictify, " {0}:" + (context.VW.Settings.EnableStringFloatCompact ? "{1}" : "{1:E20}"), feature.Name, value); } /// /// Transfers feature data to native space. /// /// The marshalling context. /// The namespace description. /// The feature description. /// The feature value. public unsafe void MarshalFeature(VowpalWabbitMarshalContext context, Namespace ns, Feature feature, <#=t#>[] value) { if (value == null) return; this.MarshalFeature(context, ns, feature, value, 0, value.Length); } /// /// Transfers feature data to native space. /// /// The marshalling context. /// The namespace description. /// The feature description. /// The feature values. /// Start offset for feature values. /// Length of feature values. public unsafe void MarshalFeature(VowpalWabbitMarshalContext context, Namespace ns, Feature feature, <#=t#>[] value, int offset, int length) { if (value == null) return; ulong i = 0; // support anchor feature if (feature.AddAnchor) { context.NamespaceBuilder.PreAllocate(value.Length + 1); context.NamespaceBuilder.AddFeature(ns.NamespaceHash, 1); i++; } else context.NamespaceBuilder.PreAllocate(value.Length); <# if (t == typeof(float)) { #> fixed (float* begin = value) { var temp = begin + offset; context.NamespaceBuilder.AddFeaturesUnchecked((ulong)(ns.NamespaceHash + i), temp, temp + length); } <# } else { #> for (var j = offset;j #if DEBUG if (v > float.MaxValue || v < float.MinValue) { Trace.TraceWarning("Precision lost for feature value: " + v); } #endif <# } #> context.NamespaceBuilder.AddFeature((ulong)(ns.NamespaceHash + i), (float)v); i++; } <# } #> if (context.StringExample == null) return; string featureString; if (feature.Dictify && context.FastDictionary != null) { if (context.FastDictionary.TryGetValue(value, out featureString)) { context.AppendStringExample(feature.Dictify, featureString); return; } } var featureBuilder = new StringBuilder(); // support anchor feature i = 0; if (feature.AddAnchor) { featureBuilder.Append(" 0:1"); i++; } var format = " {0}:" + (context.VW.Settings.EnableStringFloatCompact ? "{1}" : "{1:E20}"); for (var j = offset;j /// Transfers feature data to native space. /// /// The marshalling context. /// The namespace description. /// The feature description. /// The feature value. public void MarshalFeature(VowpalWabbitMarshalContext context, Namespace ns, Feature feature, IEnumerable<<#=t#>> value) { if (value == null) { return; } ulong i = 0; // support anchor feature if (feature.AddAnchor) { context.NamespaceBuilder.AddFeature(ns.NamespaceHash, 1); i++; } foreach (var v in value) { <# if(new[]{ typeof(Int64), typeof(UInt64), typeof(double) }.Contains(t)) {#> #if DEBUG if (v > float.MaxValue || v < float.MinValue) { Trace.TraceWarning("Precision lost for feature value: " + v); } #endif <# } #> context.NamespaceBuilder.AddFeature((ulong)(ns.NamespaceHash + i), (float)v); i++; } if (context.StringExample == null) return; string featureString; if (feature.Dictify && context.FastDictionary != null) { if (context.FastDictionary.TryGetValue(value, out featureString)) { context.AppendStringExample(feature.Dictify, featureString); return; } } var featureBuilder = new StringBuilder(); // support anchor feature i = 0; if (feature.AddAnchor) { featureBuilder.Append(" 0:1"); i++; } var format = " {0}:" + (context.VW.Settings.EnableStringFloatCompact ? "{1}" : "{1:E20}"); foreach (var v in value) { featureBuilder.AppendFormat( CultureInfo.InvariantCulture, format, i, v); i++; } featureString = featureBuilder.ToString(); if (feature.Dictify && context.FastDictionary != null) context.FastDictionary.Add(value, featureString); context.AppendStringExample(feature.Dictify, featureString); } <# } #> <# foreach(var t in new[] { typeof(byte), typeof(sbyte), typeof(Int16), typeof(Int32), typeof(UInt16) }) { #> <# foreach(var s in new[] { typeof(byte), typeof(sbyte), typeof(Int16), typeof(Int32), typeof(UInt16), typeof(UInt32), typeof(float), typeof(Int64), typeof(UInt64), typeof(double) }) { #> /// /// Transfers feature data to native space. /// /// The marshalling context. /// The namespace description. /// The feature description. /// The feature value. [ContractVerification(false)] public void MarshalFeature(VowpalWabbitMarshalContext context, Namespace ns, Feature feature, IDictionary<<#=t#>, <#=s#>> value) { Contract.Requires(context != null); Contract.Requires(ns != null); Contract.Requires(feature != null); if (value == null) return; foreach (var kvp in value) { <# if(new[]{ typeof(Int64), typeof(UInt64), typeof(double) }.Contains(s)) {#> #if DEBUG if (kvp.Value > float.MaxValue || kvp.Value < float.MinValue) { Trace.TraceWarning("Precision lost for feature value: " + kvp.Value); } #endif <# } #> context.NamespaceBuilder.AddFeature(ns.NamespaceHash + (ulong)kvp.Key, (float)kvp.Value); } if (context.StringExample == null) return; var format = " {0}:" + (context.VW.Settings.EnableStringFloatCompact ? "{1}" : "{1:E20}"); foreach (var kvp in value) { // TODO: not sure if negative numbers will work context.AppendStringExample( feature.Dictify, format, kvp.Key, kvp.Value); } } <# } #> <# } #> <# foreach(var s in new[] { typeof(byte), typeof(sbyte), typeof(Int16), typeof(Int32), typeof(UInt16), typeof(UInt32), typeof(float), typeof(Int64), typeof(UInt64), typeof(double) }) { #> /// /// Transfers feature data to native space. /// /// The marshalling context. /// The namespace description. /// The feature description. /// The feature value. [ContractVerification(false)] public void MarshalFeature(VowpalWabbitMarshalContext context, Namespace ns, Feature feature, IDictionary> value) { Contract.Requires(context != null); Contract.Requires(ns != null); Contract.Requires(feature != null); if (value == null) return; foreach (var kvp in value) { <# if(new[]{ typeof(Int64), typeof(UInt64), typeof(double) }.Contains(s)) {#> #if DEBUG if (kvp.Value > float.MaxValue || kvp.Value < float.MinValue) { Trace.TraceWarning("Precision lost for feature value: " + kvp.Value); } #endif <# } #> context.NamespaceBuilder.AddFeature( context.VW.HashFeature(kvp.Key, ns.NamespaceHash), (float)kvp.Value); } if (context.StringExample == null) return; var format = " {0}:" + (context.VW.Settings.EnableStringFloatCompact ? "{1}" : "{1:E20}"); foreach (var kvp in value) { // TODO: not sure if negative numbers will work context.AppendStringExample( feature.Dictify, format, kvp.Key, kvp.Value); } } /// /// Transfers feature data to native space. /// /// The marshalling context. /// The namespace description. /// The feature description. /// The feature value. [ContractVerification(false)] public int MarshalNamespace(VowpalWabbitMarshalContext context, Namespace ns, Feature feature, IDictionary[]> value) { Contract.Requires(context != null); Contract.Requires(ns != null); Contract.Requires(feature != null); if (value == null) return 0; int featureCount = 0; foreach (var kvp in value) { var perKeyNs = kvp.Key == null ? ns : new Namespace(context.VW, kvp.Key); try { // the namespace is only added on dispose, to be able to check if at least a single feature has been added context.NamespaceBuilder = context.ExampleBuilder.AddNamespace(perKeyNs.FeatureGroup); var position = 0; var stringExample = context.StringExample; if (context.StringExample != null) { position = stringExample.Append(perKeyNs.NamespaceString).Length; } this.MarshalFeature(context, perKeyNs, feature, kvp.Value); if (context.StringExample != null) { if (position == stringExample.Length) { // no features added, remove namespace stringExample.Length = position - ns.NamespaceString.Length; } } featureCount += (int)context.NamespaceBuilder.FeatureCount; } finally { if (context.NamespaceBuilder != null) { context.NamespaceBuilder.Dispose(); context.NamespaceBuilder = null; } } } return featureCount; } <# } #> } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/VowpalWabbitMarshalContext.cs000066400000000000000000000152321332666127000262500ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics; using System.Globalization; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; namespace VW.Serializer { /// /// Context containing state during example marshalling. /// [DebuggerDisplay("{GetHashCode()}: {ToString()}")] public class VowpalWabbitMarshalContext : IDisposable { /// /// If true disposes the example builder. Otherwise it's not owned by this instance. /// private bool disposeExampleBuilder = false; /// /// Initializes a new instance of the class. /// /// The VW instance the example will be imported to. /// Dictionary used for dictify operation. /// Dictionary used for dictify operation. public VowpalWabbitMarshalContext(IVowpalWabbitExamplePool vwPool, Dictionary dictionary = null, Dictionary fastDictionary = null) : this(vwPool.Native, new VowpalWabbitExampleBuilder(vwPool), dictionary, fastDictionary) { disposeExampleBuilder = true; } /// /// Initializes a new instance of the class. /// /// The VW instance the example will be imported to. /// A shared example builder. /// Dictionary used for dictify operation. /// Dictionary used for dictify operation. public VowpalWabbitMarshalContext(VowpalWabbit vw, VowpalWabbitExampleBuilder exampleBuilder, Dictionary dictionary = null, Dictionary fastDictionary = null) { this.VW = vw; this.ExampleBuilder = exampleBuilder; if (vw.Settings.EnableStringExampleGeneration) { this.StringExample = new StringBuilder(); this.Dictionary = dictionary; this.FastDictionary = fastDictionary; } } /// /// The VW instance the produce example will be imported to. /// public VowpalWabbit VW { get; private set; } /// /// See https://github.com/JohnLangford/vowpal_wabbit/wiki/Input-format for reference /// public StringBuilder StringExample { get; private set; } /// /// The VW string version of the label. /// public string StringLabel { get; set; } /// /// Creates the VW string for this example. /// /// public override string ToString() { if (this.StringExample == null) return null; var sb = new StringBuilder(); if (this.StringLabel != null) sb.Append(this.StringLabel); if (this.StringExample.Length > 0) { if (sb.Length > 0) sb.Append(' '); sb.Append(this.StringExample); } return sb.ToString(); } /// /// Used if dictify is true. Maps from serialized feature to surrogate key. /// public Dictionary Dictionary { get; private set; } /// /// Used if dictify is true. Maps from raw feature value (e.g. int[]) to serialized feature. /// public Dictionary FastDictionary { get; private set; } /// /// Used to build examples. /// public VowpalWabbitExampleBuilder ExampleBuilder { get; private set; } /// /// Used to build a namespace. /// public VowpalWabbitNamespaceBuilder NamespaceBuilder { get; set; } /// /// Formats based on to the string example buffer. /// /// If true, performs dictionarization on the serialized string and inserts a surrogate. /// The string format used to serialize . /// The arguments to the string format operation. public void AppendStringExample(bool dictify, string format, params object[] args) { if (this.StringExample != null) { var outputString = string.Format(CultureInfo.InvariantCulture, format, args); if (dictify && this.Dictionary != null) { string surrogate; if (!this.Dictionary.TryGetValue(outputString, out surrogate)) { // prefix to avoid number parsing surrogate = "d" + this.Dictionary.Count.ToString(CultureInfo.InvariantCulture); this.Dictionary.Add(outputString, surrogate); } this.StringExample.AppendFormat(" {0}", surrogate); } else { this.StringExample.Append(outputString); } } } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.ExampleBuilder != null && disposeExampleBuilder) { this.ExampleBuilder.Dispose(); this.ExampleBuilder = null; } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/VowpalWabbitMultiExampleSerializer.cs000066400000000000000000000115571332666127000277620ustar00rootroot00000000000000using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; using System.Text; using System.Threading.Tasks; using VW.Labels; namespace VW.Serializer { internal sealed class VowpalWabbitMultiExampleSerializer : IVowpalWabbitSerializer { private readonly VowpalWabbit vw; private readonly Func> adfAccessor; private VowpalWabbitSingleExampleSerializer sharedSerializer; private VowpalWabbitSingleExampleSerializer adfSerializer; internal VowpalWabbitMultiExampleSerializer(VowpalWabbit vw, VowpalWabbitSingleExampleSerializer sharedSerializer, VowpalWabbitSingleExampleSerializer adfSerializer, Func> adfAccessor) { // sharedSerializer can be null Contract.Requires(vw != null); Contract.Requires(adfSerializer != null); Contract.Requires(adfAccessor != null); this.vw = vw; this.sharedSerializer = sharedSerializer; this.adfSerializer = adfSerializer; this.adfAccessor = adfAccessor; } public bool EnableStringExampleGeneration { get { return this.vw.Settings.EnableStringExampleGeneration; } } public bool CachesExamples { get { return (this.sharedSerializer != null && this.sharedSerializer.CachesExamples) || this.adfSerializer.CachesExamples; } } public int GetNumberOfActionDependentExamples(TExample example) { var adfs = this.adfAccessor(example); return adfs == null ? 0 : adfs.Count(); } /// /// Serializes the given to VW string format. /// /// The example to serialize. /// The label to serialize. /// The optional index of the example, the should be attributed to. /// Dictionary used for dictify operation. /// Dictionary used for dictify operation. /// The resulting VW string. public string SerializeToString(TExample example, ILabel label = null, int? index = null, Dictionary dictionary = null, Dictionary fastDictionary = null) { var sb = new StringBuilder(); if (this.sharedSerializer != null) sb.AppendLine(this.sharedSerializer.SerializeToString(example, SharedLabel.Instance, null, dictionary, fastDictionary)); var adfCollection = this.adfAccessor(example); if (adfCollection != null) { var i = 0; foreach (var adfExample in adfCollection) { sb.AppendLine(this.adfSerializer.SerializeToString(adfExample, index != null && i == index ? label : null, null, dictionary, fastDictionary)); i++; } } return sb.ToString(); } public VowpalWabbitExampleCollection Serialize(TExample example, ILabel label = null, int? index = null) { VowpalWabbitExample shared = null; var adf = new List(); try { if (this.sharedSerializer != null) shared = this.sharedSerializer.Serialize(example, SharedLabel.Instance); var adfCollection = this.adfAccessor(example); if (adfCollection != null) { var i = 0; foreach (var adfExample in adfCollection) { adf.Add(this.adfSerializer.Serialize(adfExample, index != null && i == index ? label : null)); i++; } } return new VowpalWabbitMultiLineExampleCollection(this.vw, shared, adf.ToArray()); } catch (Exception) { if (shared != null) shared.Dispose(); foreach (var adfExample in adf) adfExample.Dispose(); throw; } } public void Dispose() { if (this.sharedSerializer != null) { this.sharedSerializer.Dispose(); this.sharedSerializer = null; } if (this.adfSerializer != null) { this.adfSerializer.Dispose(); this.adfSerializer = null; } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/VowpalWabbitMultiExampleSerializerCompiler.cs000066400000000000000000000126641332666127000314550ustar00rootroot00000000000000using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; using System.Linq.Expressions; using System.Text; using System.Threading.Tasks; using VW.Reflection; namespace VW.Serializer { /// /// Factory class to create . /// public static class VowpalWabbitMultiExampleSerializerCompiler { /// /// Creates a serializer for based on and , /// /// The example type. /// Settings for inspection. /// The schema used for serializer creation. /// If the schema is valid a compiler is created, otherwise null. public static IVowpalWabbitSerializerCompiler TryCreate(VowpalWabbitSettings settings, Schema schema) { // check for _multi var multiFeature = schema.Features.FirstOrDefault(fe => fe.Name == settings.PropertyConfiguration.MultiProperty); if (multiFeature == null) return null; // multi example path // IEnumerable<> or Array var adfType = InspectionHelper.GetEnumerableElementType(multiFeature.FeatureType); if (adfType == null) throw new ArgumentException(settings.PropertyConfiguration.MultiProperty + " property must be array or IEnumerable<>. Actual type: " + multiFeature.FeatureType); var compilerType = typeof(VowpalWabbitMultiExampleSerializerCompilerImpl<,>).MakeGenericType(typeof(TExample), adfType); return (IVowpalWabbitSerializerCompiler)Activator.CreateInstance(compilerType, settings, schema, multiFeature); } private sealed class VowpalWabbitMultiExampleSerializerCompilerImpl : IVowpalWabbitSerializerCompiler, IVowpalWabbitMultiExampleSerializerCompiler { private readonly VowpalWabbitSingleExampleSerializerCompiler sharedSerializerCompiler; private readonly VowpalWabbitSingleExampleSerializerCompiler adfSerializerComputer; private readonly Func> adfAccessor; public VowpalWabbitMultiExampleSerializerCompilerImpl(VowpalWabbitSettings settings, Schema schema, FeatureExpression multiFeature) { Contract.Requires(settings != null); Contract.Requires(schema != null); Contract.Requires(multiFeature != null); var nonMultiFeatures = schema.Features.Where(fe => fe != multiFeature).ToList(); this.sharedSerializerCompiler = nonMultiFeatures.Count == 0 ? null : new VowpalWabbitSingleExampleSerializerCompiler( new Schema { Features = nonMultiFeatures }, settings == null ? null : settings.CustomFeaturizer, !settings.EnableStringExampleGeneration); this.adfSerializerComputer = new VowpalWabbitSingleExampleSerializerCompiler( settings.TypeInspector.CreateSchema(settings, typeof(TActionDependentFeature)), settings == null ? null : settings.CustomFeaturizer, !settings.EnableStringExampleGeneration); var exampleParameter = Expression.Parameter(typeof(TExample), "example"); // CODE condition1 && condition2 && condition3 ... var condition = multiFeature.ValueValidExpressionFactories .Skip(1) .Aggregate( multiFeature.ValueValidExpressionFactories.First()(exampleParameter), (cond, factory) => Expression.AndAlso(cond, factory(exampleParameter))); var multiExpression = multiFeature.ValueExpressionFactory(exampleParameter); // CODE example => (IEnumerable)(example._multi != null ? example._multi : null) var expr = Expression.Lambda>>( Expression.Condition( condition, multiExpression, Expression.Constant(null, multiExpression.Type), typeof(IEnumerable)), exampleParameter); this.adfAccessor = (Func>)expr.CompileToFunc(); } public int GetNumberOfActionDependentExamples(TExample example) { var adfs = this.adfAccessor(example); return adfs == null ? 0 : adfs.Count(); } public IVowpalWabbitSerializer Create(VowpalWabbit vw) { return new VowpalWabbitMultiExampleSerializer( vw, this.sharedSerializerCompiler != null ? this.sharedSerializerCompiler.Create(vw) : null, this.adfSerializerComputer.Create(vw), this.adfAccessor); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/VowpalWabbitSerializerFactory.cs000066400000000000000000000124461332666127000267610ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Linq; using System.Linq.Expressions; using System.Reflection; using System.Reflection.Emit; using VW.Serializer.Attributes; using VW.Serializer.Intermediate; using VW.Labels; using System.IO; using System.Runtime.CompilerServices; using System.Diagnostics.Contracts; using VW.Reflection; using System.Collections.ObjectModel; namespace VW.Serializer { /// /// Factory to ease creation of serializers. /// public static class VowpalWabbitSerializerFactory { /// /// Example and example result type based serializer cache. /// private static readonly Dictionary SerializerCache = new Dictionary(); private sealed class Key { internal Type Type { get; set; } internal ITypeInspector TypeInspector { get; set; } internal bool EnableStringExampleGeneration { get; set; } internal bool EnableStringFloatCompact { get; set; } internal List CustomFeaturizer { get; set; } public override bool Equals(object obj) { var other = obj as Key; return other != null && this.Type == other.Type && this.TypeInspector == other.TypeInspector && this.EnableStringExampleGeneration == other.EnableStringExampleGeneration && this.EnableStringFloatCompact == other.EnableStringFloatCompact && ((this.CustomFeaturizer == null && other.CustomFeaturizer == null) || this.CustomFeaturizer.SequenceEqual(other.CustomFeaturizer)); } public override int GetHashCode() { return this.Type.GetHashCode() ^ this.TypeInspector.GetHashCode() ^ this.EnableStringExampleGeneration.GetHashCode() ^ this.EnableStringFloatCompact.GetHashCode() ^ (this.CustomFeaturizer == null ? 1 : this.CustomFeaturizer.GetHashCode()); } } /// /// Creates a serializer for the given type and settings. /// /// The user type to serialize. /// /// public static IVowpalWabbitSerializerCompiler CreateSerializer(VowpalWabbitSettings settings = null) { Schema schema = null; Key cacheKey = null; if (settings != null && settings.Schema != null) { schema = settings.Schema; } else { ITypeInspector typeInspector = settings.TypeInspector; if (typeInspector == null) typeInspector = TypeInspector.Default; // only cache non-string generating serializer cacheKey = new Key { Type = typeof(TExample), TypeInspector = typeInspector, CustomFeaturizer = settings == null ? null : settings.CustomFeaturizer, EnableStringExampleGeneration = settings == null ? false : settings.EnableStringExampleGeneration, EnableStringFloatCompact = settings == null ? false : settings.EnableStringFloatCompact }; object serializer; if (SerializerCache.TryGetValue(cacheKey, out serializer)) { return (IVowpalWabbitSerializerCompiler)serializer; } // TODO: enhance caching based on feature list & featurizer set // if no feature mapping is provided, use [Feature] annotation on provided type. schema = typeInspector.CreateSchema(settings, typeof(TExample)); var multiExampleSerializerCompiler = VowpalWabbitMultiExampleSerializerCompiler.TryCreate(settings, schema); if (multiExampleSerializerCompiler != null) return multiExampleSerializerCompiler; } // need at least a single feature to do something sensible if (schema == null || schema.Features.Count == 0) { return null; } var newSerializer = new VowpalWabbitSingleExampleSerializerCompiler( schema, settings == null ? null : settings.CustomFeaturizer, !settings.EnableStringExampleGeneration); if (cacheKey != null) { SerializerCache[cacheKey] = newSerializer; } return newSerializer; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/VowpalWabbitSingleExampleSerializer.cs000066400000000000000000000303201332666127000300760ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Globalization; using System.Linq; using VW.Labels; using VW.Serializer.Attributes; namespace VW.Serializer { /// /// A serializer from a user type (TExample) to a native Vowpal Wabbit example type. /// /// The source example type. public sealed class VowpalWabbitSingleExampleSerializer : IVowpalWabbitSerializer, IVowpalWabbitExamplePool { private class CacheEntry { internal VowpalWabbitExample Example; internal DateTime LastRecentUse; #if DEBUG internal bool InUse; #endif } private readonly VowpalWabbitSingleExampleSerializerCompiler compiler; private Dictionary exampleCache; #if DEBUG /// /// Reverse lookup from native example to cache entry to enable proper usage. /// /// /// To avoid any performance impact this is only enabled in debug mode. /// private readonly Dictionary reverseLookup; #endif private readonly VowpalWabbit vw; private readonly Action serializerFunc; internal VowpalWabbitSingleExampleSerializer(VowpalWabbitSingleExampleSerializerCompiler compiler, VowpalWabbit vw) { if (compiler == null) throw new ArgumentNullException("compiler"); Contract.Ensures(vw != null); Contract.EndContractBlock(); this.vw = vw; this.compiler = compiler; var exampleType = typeof(TExample); if (!exampleType.IsVisible) throw new ArgumentException($"Type '{typeof(TExample)}' must be public and all enclosing types must be public."); this.serializerFunc = compiler.Func(vw); var cacheableAttribute = (CacheableAttribute) typeof (TExample).GetCustomAttributes(typeof (CacheableAttribute), true).FirstOrDefault(); if (cacheableAttribute == null) return; if (this.vw.Settings.EnableExampleCaching) { if (cacheableAttribute.EqualityComparer == null) this.exampleCache = new Dictionary(); else { if (!typeof(IEqualityComparer).IsAssignableFrom(cacheableAttribute.EqualityComparer)) { throw new ArgumentException( string.Format( CultureInfo.InvariantCulture, "EqualityComparer ({1}) specified in [Cachable] of {0} must implement IEqualityComparer<{0}>", typeof(TExample), cacheableAttribute.EqualityComparer)); } var comparer = (IEqualityComparer)Activator.CreateInstance(cacheableAttribute.EqualityComparer); this.exampleCache = new Dictionary(comparer); } #if DEBUG this.reverseLookup = new Dictionary(new ReferenceEqualityComparer()); #endif } } /// /// True if this instance caches examples, false otherwise. /// public bool CachesExamples { get { return this.exampleCache != null; } } /// /// True if string examples are generated in parallel to native examples. /// public bool EnableStringExampleGeneration { get { return !this.compiler.DisableStringExampleGeneration; } } /// /// Serializes the given to VW string format. /// /// The example to serialize. /// The label to serialize. /// The optional index of the example, the should be attributed to. /// Dictionary used for dictify operation. /// Dictionary used for dictify operation. /// The resulting VW string. public string SerializeToString(TExample example, ILabel label = null, int? index = null, Dictionary dictionary = null, Dictionary fastDictionary = null) { Contract.Requires(example != null); using (var context = new VowpalWabbitMarshalContext(vw, dictionary, fastDictionary)) { this.serializerFunc(context, example, label); return context.ToString(); } } /// /// Serialize the example. /// /// The example to serialize. /// The label to be serialized. /// The optional index of the example, the should be attributed to. /// The serialized example. /// If is annotated using the Cachable attribute, examples are returned from cache. VowpalWabbitExampleCollection IVowpalWabbitSerializer.Serialize(TExample example, ILabel label, int? index) { // dispatch return new VowpalWabbitSingleLineExampleCollection(vw, Serialize(example, label, index)); } /// /// Serialize the example. /// /// The example to serialize. /// The label to be serialized. /// The optional index of the example, the should be attributed to. /// The serialized example. /// If is annotated using the Cachable attribute, examples are returned from cache. public VowpalWabbitExample Serialize(TExample example, ILabel label = null, int? index = null) { Contract.Requires(example != null); Contract.Requires(index == null); if (this.exampleCache == null || label != null) { using (var context = new VowpalWabbitMarshalContext(vw)) { this.serializerFunc(context, example, label); var vwExample = context.ExampleBuilder.CreateExample(); if (this.EnableStringExampleGeneration) vwExample.VowpalWabbitString = context.ToString(); return vwExample; } } CacheEntry result; if (this.exampleCache.TryGetValue(example, out result)) { result.LastRecentUse = DateTime.UtcNow; #if DEBUG if (result.InUse) { throw new ArgumentException("Cached example already in use."); } #endif } else { VowpalWabbitExample nativeExample = null; try { using (var context = new VowpalWabbitMarshalContext(this)) { this.serializerFunc(context, example, label); nativeExample = context.ExampleBuilder.CreateExample(); } result = new CacheEntry { Example = nativeExample, LastRecentUse = DateTime.UtcNow }; this.exampleCache.Add(example, result); #if DEBUG this.reverseLookup.Add(result.Example, result); #endif } catch(Exception e) { if (nativeExample != null) nativeExample.Dispose(); throw e; } } #if DEBUG result.InUse = true; #endif // TODO: support Label != null here and update cached example using new label return result.Example; } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.exampleCache != null) { foreach (var example in this.exampleCache.Values) { example.Example.InnerExample.Dispose(); } this.exampleCache = null; } } } /// /// The associated native instance. /// public VowpalWabbit Native { get { return this.vw; } } /// /// Gets an already allocated instance from the example pool or creates a new one. /// /// public VowpalWabbitExample GetOrCreateNativeExample() { return new VowpalWabbitExample(owner: this, example: this.vw.GetOrCreateNativeExample()); } /// /// Accepts an example back into this pool. /// /// The example to be returned. public void ReturnExampleToPool(VowpalWabbitExample example) { if (this.exampleCache == null) { throw new ObjectDisposedException("VowpalWabbitSerializer"); } #if DEBUG CacheEntry cacheEntry; if (!this.reverseLookup.TryGetValue(example, out cacheEntry)) { throw new ArgumentException("Example is not found in pool"); } if (!cacheEntry.InUse) { throw new ArgumentException("Unused example returned"); } cacheEntry.InUse = false; #endif // if we reach the cache boundary, dispose the oldest example if (this.exampleCache.Count > this.vw.Settings.MaxExampleCacheSize) { var enumerator = this.exampleCache.GetEnumerator(); // this.settings.MaxExampleCacheSize is >= 1 enumerator.MoveNext(); var min = enumerator.Current; while (enumerator.MoveNext()) { if (min.Value.LastRecentUse > enumerator.Current.Value.LastRecentUse) { min = enumerator.Current; } } #if DEBUG this.reverseLookup.Remove(min.Value.Example); #endif this.exampleCache.Remove(min.Key); min.Value.Example.InnerExample.Dispose(); } } private class ReferenceEqualityComparer : IEqualityComparer { public bool Equals(T x, T y) { return object.ReferenceEquals(x, y); } public int GetHashCode(T obj) { return obj.GetHashCode(); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/Serializer/VowpalWabbitSingleExampleSerializerCompiler.cs000066400000000000000000000655141332666127000316060ustar00rootroot00000000000000using System; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.Contracts; using System.Linq; using System.Linq.Expressions; using System.Reflection; using System.Reflection.Emit; using System.Text; using System.Threading.Tasks; using VW.Labels; using VW.Reflection; using VW.Serializer.Intermediate; namespace VW.Serializer { /// /// Compiles a serializers for the given example user type. /// /// The example user type. /// A serializer for the given user example type. public sealed class VowpalWabbitSingleExampleSerializerCompiler : IVowpalWabbitSerializerCompiler { /// /// Internal structure collecting all itmes required to marshal a single feature. /// [DebuggerDisplay("FeatureExpressionInternal(Source={Source}, MarshalMethod={MarshalMethod})")] internal sealed class FeatureExpressionInternal { /// /// The supplied feature expression. /// internal FeatureExpression Source; /// /// The resolved mrarshalling method. /// internal MarshalMethod MarshalMethod; } /// /// Describes the actual marshalling method and the feature type (e.g. ). /// internal sealed class MarshalMethod { /// /// The actual marshalling method. /// internal MethodInfo Method; /// /// The feature type (e.g. ). /// internal Type MetaFeatureType; /// /// True if the method can marshal a full namespace. /// internal bool IsNamespace; } /// /// All discovered features. /// private FeatureExpressionInternal[] allFeatures; private readonly Schema schema; /// /// Ordered list of featurizer types. Marshalling methods are resolved in order of this list. /// is added last as default. /// private readonly List marshallerTypes; /// /// The main body of the serializer holding preemptive calcutions (e.g. . /// private readonly List body; /// /// The body executed for example. /// private readonly List perExampleBody; /// /// Local variables. /// private readonly List variables; /// /// Local variables holding namespaces. /// private readonly List namespaceVariables; /// /// The parameter of the main lambda to . /// private ParameterExpression vwParameter; /// /// The parameter of the main lambda to . /// private ParameterExpression contextParameter; /// /// The parameter of the per example lambda to . /// private ParameterExpression exampleParameter; /// /// The parameter of the per example lambda to /// private ParameterExpression labelParameter; /// /// The list of featurizers. /// private readonly List marshallers; /// /// The list of meta features such as . /// private readonly List metaFeatures; /// /// If true, VowpalWabbit string generation is disabled. /// private readonly bool disableStringExampleGeneration; internal VowpalWabbitSingleExampleSerializerCompiler(Schema schema, IReadOnlyList featurizerTypes, bool disableStringExampleGeneration) { if (schema == null || schema.Features.Count == 0) throw new ArgumentException("schema"); Contract.EndContractBlock(); this.schema = schema; this.disableStringExampleGeneration = disableStringExampleGeneration; this.allFeatures = schema.Features.Select(f => new FeatureExpressionInternal { Source = f }).ToArray(); // collect the types used for marshalling this.marshallerTypes = featurizerTypes == null ? new List() : new List(featurizerTypes); // extract types from overrides defined on particular features var overrideFeaturizerTypes = schema.Features .Select(f => f.OverrideSerializeMethod) .Where(o => o != null) .Select(o => o.DeclaringType); this.marshallerTypes.AddRange(overrideFeaturizerTypes); // add as last this.marshallerTypes.Add(typeof(VowpalWabbitDefaultMarshaller)); this.body = new List(); this.perExampleBody = new List(); this.variables = new List(); this.namespaceVariables = new List(); this.marshallers = new List(); this.metaFeatures = new List(); this.CreateMarshallers(); this.ResolveFeatureMarshallingMethods(); this.CreateParameters(); this.CreateLabel(); this.CreateNamespacesAndFeatures(); this.CreateLambdas(); this.Func = (Func>)this.SourceExpression.CompileToFunc(); } internal bool DisableStringExampleGeneration { get { return this.disableStringExampleGeneration; } } /// /// Creates a bound serializers. /// /// The vw instance to bind to. /// IVowpalWabbitSerializer IVowpalWabbitSerializerCompiler.Create(VowpalWabbit vw) { return this.Create(vw); } /// /// Creates a serializer for bound to . /// /// The VW native instance examples will be assocated with. /// A serializer for . public VowpalWabbitSingleExampleSerializer Create(VowpalWabbit vw) { return new VowpalWabbitSingleExampleSerializer(this, vw); } private void CreateLabel() { // CODE if (labelParameter == null) this.perExampleBody.Add(Expression.IfThen( Expression.NotEqual(this.labelParameter, Expression.Constant(null, typeof(ILabel))), this.CreateMarshallerCall("MarshalLabel", this.contextParameter, this.labelParameter))); var label = this.schema.Label; if (label != null) { // CODE condition1 && condition2 && condition3 ... var condition = label.ValueValidExpressionFactories .Skip(1) .Aggregate( label.ValueValidExpressionFactories.First()(this.exampleParameter), (cond, factory) => Expression.AndAlso(cond, factory(this.exampleParameter))); // CODE if (labelParameter != null && example.Label != null && ...) this.perExampleBody.Add( Expression.IfThen( Expression.AndAlso( Expression.Equal(this.labelParameter, Expression.Constant(null, typeof(ILabel))), condition), // CODE MarshalLabel(context, example.Label) this.CreateMarshallerCall("MarshalLabel", this.contextParameter, label.ValueExpressionFactory(this.exampleParameter)))); } } /// /// Define variables and instantiate marshaller types. /// private void CreateMarshallers() { foreach (var marshallerType in this.marshallerTypes) { var marshaller = Expression.Parameter(marshallerType, "marshaller_" + marshallerType.Name); this.marshallers.Add(marshaller); this.variables.Add(marshaller); // CODE new FeaturizerType(disableStringExampleGeneration) var newExpr = CreateNew(marshallerType, Expression.Constant(disableStringExampleGeneration)); if (newExpr == null) { // CODE new MarshallerType() newExpr = Expression.New(marshallerType); } // marshaller = new ... this.body.Add(Expression.Assign(marshaller, newExpr)); } } private MarshalMethod ResolveFeatureMarshalMethod(FeatureExpression feature) { if (feature.OverrideSerializeMethod != null) { return new MarshalMethod { Method = feature.OverrideSerializeMethod, MetaFeatureType = feature.OverrideSerializeMethod.GetParameters().Select(p => p.ParameterType).First(t => typeof(Feature).IsAssignableFrom(t)) }; } string methodName; Type[] metaFeatureTypeCandidates; if (feature.FeatureType == typeof(string)) { switch(feature.StringProcessing) { case StringProcessing.Escape: methodName = "MarshalFeatureStringEscape"; break; case StringProcessing.EscapeAndIncludeName: methodName = "MarshalFeatureStringEscapeAndIncludeName"; break; case StringProcessing.Split: methodName = "MarshalFeatureStringSplit"; break; default: throw new ArgumentException("feature.StringProcessing is not supported: " + feature.StringProcessing); } metaFeatureTypeCandidates = new [] { typeof(Feature) }; } else if (feature.FeatureType.IsEnum) { methodName = "MarshalEnumFeature"; metaFeatureTypeCandidates = new [] { typeof(EnumerizedFeature<>).MakeGenericType(feature.FeatureType) }; } else if (feature.Enumerize) { methodName = "MarshalEnumerizeFeature"; metaFeatureTypeCandidates = new [] { typeof(Feature) }; } else { // probe for PreHashedFeature marshal method, than fallback methodName = "MarshalFeature"; metaFeatureTypeCandidates = new [] { typeof(PreHashedFeature), typeof(Feature) }; } // remove Nullable<> from feature type var featureType = feature.FeatureType; if(featureType.IsGenericType && featureType.GetGenericTypeDefinition() == typeof(Nullable<>)) { featureType = featureType.GetGenericArguments()[0]; } var method = ResolveFeatureMarshalMethod("MarshalNamespace", metaFeatureTypeCandidates, featureType, isNamespace: true); if (method == null) method = ResolveFeatureMarshalMethod(methodName, metaFeatureTypeCandidates, featureType, isNamespace: false); return method; } private MarshalMethod ResolveFeatureMarshalMethod(string methodName, Type[] metaFeatureTypeCandidates, Type featureType, bool isNamespace) { foreach (var metaFeatureType in metaFeatureTypeCandidates) { // find visitor.(VowpalWabbitMarshallingContext context, Namespace ns, feature, value) var method = this.marshallerTypes .Select(visitor => ReflectionHelper.FindMethod( visitor, methodName, typeof(VowpalWabbitMarshalContext), typeof(Namespace), metaFeatureType, featureType)) .FirstOrDefault(m => m != null); if (method != null) return new MarshalMethod { Method = method, MetaFeatureType = metaFeatureType, IsNamespace = isNamespace }; } return null; } private bool ContainsAncestor(FeatureExpressionInternal candidate, List validFeature) { if (candidate.Source.Parent == null) return false; if (validFeature.Any(valid => object.ReferenceEquals(valid.Source, candidate.Source.Parent))) return true; var parent = this.allFeatures.First(f => object.ReferenceEquals(f.Source, candidate.Source.Parent)); return ContainsAncestor(parent, validFeature); } /// /// Resolve methods for each feature base on feature type and configuration. /// private void ResolveFeatureMarshallingMethods() { var validFeature = new List(this.allFeatures.Length); foreach (var feature in this.allFeatures) { // skip any feature which parent feature is already resolved if (ContainsAncestor(feature, validFeature)) continue; feature.MarshalMethod = this.ResolveFeatureMarshalMethod(feature.Source); if (feature.MarshalMethod != null) validFeature.Add(feature); } this.allFeatures = validFeature.ToArray(); } /// /// define functions input parameter /// private void CreateParameters() { this.vwParameter = Expression.Parameter(typeof(VowpalWabbit), "vw"); this.contextParameter = Expression.Parameter(typeof(VowpalWabbitMarshalContext), "context"); this.exampleParameter = Expression.Parameter(typeof(TExample), "example"); this.labelParameter = Expression.Parameter(typeof(ILabel), "label"); } /// /// Instantiate the meta information object such as /// for a given feature. /// /// The feature. /// The namespace. /// The "new" expression for the meta information object. private Expression CreateFeature(FeatureExpressionInternal featureInternal, Expression @namespace) { FeatureExpression feature = featureInternal.Source; var metaFeatureType = featureInternal.MarshalMethod.MetaFeatureType; if (metaFeatureType.IsGenericType && metaFeatureType.GetGenericTypeDefinition() == typeof(EnumerizedFeature<>)) { // preemptively calculate all hashes for each enum value var featureParameter = Expression.Parameter(metaFeatureType); var valueParameter = Expression.Parameter(feature.FeatureType); var body = new List(); var hashVariables = new List(); foreach (var value in Enum.GetValues(feature.FeatureType)) { var hashVar = Expression.Variable(typeof(UInt64)); hashVariables.Add(hashVar); // CODE hashVar = feature.FeatureHashInternal(value); body.Add(Expression.Assign(hashVar, Expression.Call(featureParameter, metaFeatureType.GetMethod("FeatureHashInternal"), Expression.Constant(value)))); } var cases = Enum.GetValues(feature.FeatureType) .Cast() .Zip(hashVariables, (value, hash) => Expression.SwitchCase( hash, Expression.Constant(value, feature.FeatureType))) .ToArray(); // expand the switch(value) { case enum1: return hash1; .... } var hashSwitch = Expression.Switch(valueParameter, Expression.Block(Expression.Throw(Expression.New(typeof(NotSupportedException))), Expression.Constant((UInt64)0, typeof(UInt64))), cases); // CODE return value => switch(value) { .... } body.Add(Expression.Lambda(hashSwitch, valueParameter)); return CreateNew( metaFeatureType, this.vwParameter, @namespace, Expression.Constant(feature.Name, typeof(string)), Expression.Constant(feature.AddAnchor), Expression.Constant(feature.Dictify), Expression.Lambda(Expression.Block(hashVariables, body), featureParameter)); } else if (metaFeatureType == typeof(PreHashedFeature)) { // CODE new PreHashedFeature(vw, namespace, "Name", "AddAnchor"); return CreateNew( typeof(PreHashedFeature), this.vwParameter, @namespace, Expression.Constant(feature.Name, typeof(string)), Expression.Constant(feature.AddAnchor), Expression.Constant(feature.Dictify)); } else // CODE new Feature("Name", ...) return CreateNew( metaFeatureType, Expression.Constant(feature.Name, typeof(string)), Expression.Constant(feature.AddAnchor), Expression.Constant(feature.Dictify)); } /// /// Helper to create the "new" expression using a matching constructor. /// /// The type of the new object. /// The actual parameters for the constructor. /// The "new" expression bound to . private static Expression CreateNew(Type type, params Expression[] constructorParameters) { var ctor = type.GetConstructor(constructorParameters.Select(e => e.Type).ToArray()); if (ctor == null) return null; return Expression.New(ctor, constructorParameters); } private void CreateNamespacesAndFeatures() { var featuresByNamespace = this.allFeatures .GroupBy( f => new { f.Source.Namespace, f.Source.FeatureGroup }, f => f); foreach (var ns in featuresByNamespace) { // each feature can have 2 additional parameters (namespace + feature) // Visit(VowpalWabbit, CustomNamespace, CustomFeature) // create default namespace object var namespaceVariable = Expression.Variable(typeof(Namespace), "ns_" + ns.Key.FeatureGroup + ns.Key.Namespace); this.variables.Add(namespaceVariable); // CODE ns = new Namespace(vw, name, featureGroup); this.body.Add(Expression.Assign(namespaceVariable, CreateNew( typeof(Namespace), this.vwParameter, Expression.Constant(ns.Key.Namespace, typeof(string)), ns.Key.FeatureGroup == null ? (Expression)Expression.Constant(null, typeof(char?)) : Expression.New((ConstructorInfo)ReflectionHelper.GetInfo((char v) => new char?(v)), Expression.Constant((char)ns.Key.FeatureGroup))))); var fullNamespaceCalls = new List(); var featureVisits = new List(ns.Count()); foreach (var feature in ns.OrderBy(f => f.Source.Order)) { var newFeature = feature.Source.FeatureExpressionFactory != null ? feature.Source.FeatureExpressionFactory(this.vwParameter, namespaceVariable) : this.CreateFeature(feature, namespaceVariable); var featureVariable = Expression.Variable(newFeature.Type, "feature_" + feature.Source.Name); this.variables.Add(featureVariable); // CODE var feature = new ... this.body.Add(Expression.Assign(featureVariable, newFeature)); // TODO: optimize var marshaller = this.marshallers.First(f => f.Type == feature.MarshalMethod.Method.ReflectedType); var valueVariable = feature.Source.ValueExpressionFactory(this.exampleParameter); Expression featureVisit; if (feature.Source.IsNullable) { // if (value != null) featurizer.MarshalXXX(vw, context, ns, feature, (FeatureType)value); featureVisit = Expression.IfThen( Expression.NotEqual(valueVariable, Expression.Constant(null)), Expression.Call( marshaller, feature.MarshalMethod.Method, this.contextParameter, namespaceVariable, featureVariable, Expression.Convert(valueVariable, feature.Source.FeatureType))); } else { // featurizer.MarshalXXX(vw, context, ns, feature, value); featureVisit = Expression.Call( marshaller, feature.MarshalMethod.Method, this.contextParameter, namespaceVariable, featureVariable, valueVariable); } if (feature.Source.ValueValidExpressionFactories != null && feature.Source.ValueValidExpressionFactories.Count > 0) { // CODE condition1 && condition2 && condition3 ... var condition = feature.Source.ValueValidExpressionFactories .Skip(1) .Aggregate( feature.Source.ValueValidExpressionFactories.First()(this.exampleParameter), (cond, factory) => Expression.AndAlso(cond, factory(this.exampleParameter))); featureVisit = Expression.IfThen(condition, featureVisit); } if (feature.MarshalMethod.IsNamespace) this.perExampleBody.Add(featureVisit); else featureVisits.Add(featureVisit); } if (featureVisits.Count > 0) { var featureVisitLambda = Expression.Lambda(Expression.Block(featureVisits)); // CODE: featurizer.MarshalNamespace(context, namespace, { ... }) this.perExampleBody.Add(this.CreateMarshallerCall("MarshalNamespace", this.contextParameter, namespaceVariable, featureVisitLambda)); } } } /// /// Create the invocation expression of a marshalling method. /// /// The marshalling method to invoke. /// The parameters for this method. private MethodCallExpression CreateMarshallerCall(string methodName, params Expression[] parameters) { var parameterTypes = parameters.Select(p => p.Type).ToArray(); foreach (var marshaller in this.marshallers) { var method = marshaller.Type.GetMethod(methodName, parameterTypes); if (method != null) { return Expression.Call(marshaller, method, parameters); } } throw new ArgumentException("Unable to find MarshalNamespace(VowpalWabbitMarshallingContext, Namespace, Action) on any featurizer"); } /// /// Creates the main lambda and the per example lambda. /// private void CreateLambdas() { // CODE (TExample, Label) => { ... } this.body.Add( Expression.Lambda( typeof(Action), Expression.Block(this.perExampleBody), this.contextParameter, this.exampleParameter, this.labelParameter)); // CODE return (vw) => { ... return (ex, label) => { ... } } this.SourceExpression = Expression.Lambda>>( Expression.Block(this.variables, this.body), this.vwParameter); } /// /// The source expression tree is built from. /// public Expression>> SourceExpression { get; private set; } /// /// The closure used for serialization. /// public Func> Func { get; private set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/VowpalWabbit.cs000066400000000000000000000524341332666127000212670ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using VW.Labels; using VW.Serializer; namespace VW { /// /// VW wrapper supporting data ingest using declarative serializer infrastructure. /// /// The user type to be serialized. public class VowpalWabbit : IDisposable { /// /// Native vw instance. /// private VowpalWabbit vw; /// /// The example serializer. /// private IVowpalWabbitSerializer serializer; /// /// The example serializer compilation. Useful when debugging. /// private IVowpalWabbitSerializerCompiler compiledSerializer; /// /// The serializer used for learning. It's only set if the serializer is non-caching. /// By having a second field there is one less check that has to be done in the hot path. /// private readonly IVowpalWabbitSerializer learnSerializer; private readonly VowpalWabbitSingleExampleSerializer singleLineSerializer; /// /// Initializes a new instance of the class. /// /// Command line arguments passed to native instance. public VowpalWabbit(String args) : this(new VowpalWabbit(args)) { } /// /// Initializes a new instance of the class. /// /// Arguments passed to native instance. public VowpalWabbit(VowpalWabbitSettings settings) : this(new VowpalWabbit(settings)) { } /// /// Initializes a new instance of the class. /// /// The native instance to wrap. /// This instance takes ownership of instance and disposes it. public VowpalWabbit(VowpalWabbit vw) : this(vw, VowpalWabbitSerializerFactory.CreateSerializer(vw.Settings)) { } /// /// Initializes a new instance of the class. /// /// The native instance to wrap. /// The per-compiled serializer. /// This instance takes ownership of instance and disposes it. public VowpalWabbit(VowpalWabbit vw, IVowpalWabbitSerializerCompiler compiledSerializer) { if (vw == null) throw new ArgumentNullException(nameof(vw)); if (compiledSerializer == null) throw new ArgumentNullException(nameof(compiledSerializer)); Contract.Ensures(this.serializer != null); Contract.EndContractBlock(); this.vw = vw; this.compiledSerializer = compiledSerializer; this.serializer = this.compiledSerializer.Create(vw); // have a 2nd member to throw NullReferenceException in release instead of silently producing wrong results. this.learnSerializer = this.serializer.CachesExamples ? null : this.serializer; // have a 3rd member to avoid cast everytime... this.singleLineSerializer = this.serializer as VowpalWabbitSingleExampleSerializer; } /// /// The wrapped VW instance. /// public VowpalWabbit Native { get { return this.vw; } } /// /// The serializer used to marshal examples. /// public IVowpalWabbitSerializerCompiler Serializer { get { return this.compiledSerializer; } } /// /// Learns from the given example. /// /// The example to learn. /// The label for this . /// The optional index of the example, the should be attributed to. public void Learn(TExample example, ILabel label = null, int? index = null) { Contract.Requires(example != null); Contract.Requires(label != null); #if DEBUG if (this.serializer.CachesExamples) { throw new NotSupportedException("Cached examples cannot be used for learning"); } #endif // in release this throws NullReferenceException instead of producing silently wrong results using (var ex = this.learnSerializer.Serialize(example, label, index)) { ex.Learn(); } } /// /// Learn from the given example and returns the current prediction for it. /// /// The prediction type. /// The example to learn. /// The label for this . /// The prediction factory to be used. See . /// The prediction for the given . public TPrediction Learn(TExample example, ILabel label, IVowpalWabbitPredictionFactory predictionFactory) { Contract.Requires(example != null); Contract.Requires(label != null); Contract.Requires(predictionFactory != null); #if DEBUG // only in debug, since it's a hot path if (this.serializer.CachesExamples) { throw new NotSupportedException("Cached examples cannot be used for learning"); } #endif using (var ex = this.learnSerializer.Serialize(example, label)) { return ex.Learn(predictionFactory); } } /// /// Predicts for the given example. /// /// The example to predict for. /// This label can be used to weight the example. public void Predict(TExample example, ILabel label = null) { Contract.Requires(example != null); using (var ex = this.serializer.Serialize(example, label)) { ex.Predict(); } } /// /// Predicts for the given example. /// /// The prediction type. /// The example to predict for. /// The prediction factory to be used. See . /// This label can be used to weight the example. public TPrediction Predict(TExample example, IVowpalWabbitPredictionFactory predictionFactory, ILabel label = null) { Contract.Requires(example != null); Contract.Requires(predictionFactory != null); using (var ex = this.serializer.Serialize(example, label)) { return ex.Predict(predictionFactory); } } /// /// Learn from the given example and return the current prediction for it. /// /// The action dependent features. /// The index of the example to learn within . /// The label for the example to learn. public void Learn(IReadOnlyCollection actionDependentFeatures, int index, ILabel label) { Contract.Requires(actionDependentFeatures != null); Contract.Requires(this.singleLineSerializer != null, string.Format( "{0} maps to a multiline example. Use VowpalWabbit.Learn<{0}>({0} example,...) instead.", typeof(TExample))); VowpalWabbitMultiLine.Learn( this.vw, null, this.singleLineSerializer, null, actionDependentFeatures, index, label); } /// /// Learn from the given example and return the current prediction for it. /// /// The action dependent features. /// The index of the example to learn within . /// The label for the example to learn. /// The ranked prediction for the given examples. public ActionDependentFeature[] LearnAndPredict(IReadOnlyCollection actionDependentFeatures, int index, ILabel label) { Contract.Requires(actionDependentFeatures != null); Contract.Requires(index >= 0); Contract.Requires(label != null); Contract.Requires(this.singleLineSerializer != null, string.Format( "{0} maps to a multiline example. Use VowpalWabbit.Learn<{0}>({0} example,...) instead.", typeof(TExample))); return VowpalWabbitMultiLine.LearnAndPredict( this.vw, null, this.singleLineSerializer, null, actionDependentFeatures, index, label); } /// /// Predict for the given example and return the current prediction for it. /// /// The action dependent features. /// The index of the example to evaluate within . /// The label for the example to evaluate. /// The ranked prediction for the given examples. public ActionDependentFeature[] Predict(IReadOnlyCollection actionDependentFeatures, int? index = null, ILabel label = null) { Contract.Requires(actionDependentFeatures != null); Contract.Requires(this.singleLineSerializer != null, string.Format( "{0} maps to a multiline example. Use VowpalWabbit.Learn<{0}>({0} example,...) instead.", typeof(TExample))); return VowpalWabbitMultiLine.Predict( this.vw, null, this.singleLineSerializer, null, actionDependentFeatures, index, label); } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.serializer != null) { this.serializer.Dispose(); this.serializer = null; } if (this.vw != null) { this.vw.Dispose(); this.vw = null; } } } } /// /// VW wrapper for multiline ingest. /// /// The user type of the shared feature. /// The user type for each action dependent feature. public class VowpalWabbit : IDisposable { /// /// The native wrapper. /// private VowpalWabbit vw; /// /// The shared example serializer. /// private VowpalWabbitSingleExampleSerializer serializer; /// /// The action dependent feature serializer. /// private VowpalWabbitSingleExampleSerializer actionDependentFeatureSerializer; /// /// The action dependent feature serializer valid for learning. If example caching is enabled, this is null. /// private readonly VowpalWabbitSingleExampleSerializer actionDependentFeatureLearnSerializer; /// /// Initializes a new instance of the class. /// /// Command line arguments passed to native instance. public VowpalWabbit(String args) : this(new VowpalWabbit(args)) { } /// /// Initializes a new instance of the class. /// /// Arguments passed to native instance. public VowpalWabbit(VowpalWabbitSettings settings) : this(new VowpalWabbit(settings)) { } /// /// Initializes a new instance of the class. /// /// The native instance to wrap. /// This instance takes ownership of instance and disposes it. public VowpalWabbit(VowpalWabbit vw) { if (vw == null) { throw new ArgumentNullException("vw"); } Contract.EndContractBlock(); this.vw = vw; this.serializer = VowpalWabbitSerializerFactory.CreateSerializer(vw.Settings).Create(vw) as VowpalWabbitSingleExampleSerializer; if (this.serializer == null) throw new ArgumentException(string.Format( "{0} maps to a multiline example. Use VowpalWabbit<{0}> instead.", typeof(TExample))); var adfSettings = (VowpalWabbitSettings)vw.Settings.Clone(); adfSettings.Schema = vw.Settings.ActionDependentSchema; this.actionDependentFeatureSerializer = VowpalWabbitSerializerFactory.CreateSerializer(adfSettings).Create(vw) as VowpalWabbitSingleExampleSerializer; if (this.actionDependentFeatureSerializer == null) throw new ArgumentException(string.Format( "{0} maps to a multiline example. Use VowpalWabbit<{0}> instead.", typeof(TActionDependentFeature))); // have a 2nd member to throw NullReferenceException in release instead of silently producing wrong results. this.actionDependentFeatureLearnSerializer = this.actionDependentFeatureSerializer.CachesExamples ? null : this.actionDependentFeatureSerializer; } /// /// The wrapped VW instance. /// public VowpalWabbit Native { get { return this.vw; } } /// /// Internal example serializer. /// internal IVowpalWabbitSerializer ExampleSerializer { get { return this.serializer; } } /// /// Internal action dependent feature serializer. /// internal IVowpalWabbitSerializer ActionDependentFeatureSerializer { get { return this.actionDependentFeatureSerializer; } } /// /// Learn from the given example and return the current prediction for it. /// /// The shared example. /// The action dependent features. /// The index of the example to learn within . /// The label for the example to learn. public void Learn(TExample example, IReadOnlyCollection actionDependentFeatures, int index, ILabel label) { Contract.Requires(example != null); Contract.Requires(actionDependentFeatures != null); VowpalWabbitMultiLine.Learn( this.vw, this.serializer, this.actionDependentFeatureLearnSerializer, example, actionDependentFeatures, index, label); } /// /// Learn from the given example and return the current prediction for it. /// /// The shared example. /// The action dependent features. /// The index of the example to learn within . /// The label for the example to learn. /// The ranked prediction for the given examples. public ActionDependentFeature[] LearnAndPredict(TExample example, IReadOnlyCollection actionDependentFeatures, int index, ILabel label) { Contract.Requires(example != null); Contract.Requires(actionDependentFeatures != null); Contract.Requires(index >= 0); Contract.Requires(label != null); return VowpalWabbitMultiLine.LearnAndPredict( this.vw, this.serializer, this.actionDependentFeatureLearnSerializer, example, actionDependentFeatures, index, label); } /// /// Learn from the given example and return the current prediction for it. /// /// The shared example. /// The action dependent features. /// The index of the example to evaluate within . /// The label for the example to evaluate. /// The ranked prediction for the given examples. public ActionDependentFeature[] Predict(TExample example, IReadOnlyCollection actionDependentFeatures, int? index = null, ILabel label = null) { Contract.Requires(example != null); Contract.Requires(actionDependentFeatures != null); return VowpalWabbitMultiLine.Predict( this.vw, this.serializer, this.actionDependentFeatureSerializer, example, actionDependentFeatures, index, label); } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.vw != null) { this.vw.Dispose(); this.vw = null; } if (this.serializer != null) { this.serializer.Dispose(); this.serializer = null; } if (this.actionDependentFeatureSerializer != null) { this.actionDependentFeatureSerializer.Dispose(); this.actionDependentFeatureSerializer = null; } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/VowpalWabbitDynamic.cs000066400000000000000000000157571332666127000226030ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; using System.Linq.Expressions; using System.Reflection; using System.Text; using System.Threading.Tasks; using VW.Labels; using VW.Serializer; namespace VW { /// /// Vowpal Wabbit wrapper for anonymous classes. Type used for serialization doesn't need to be known at compile time, /// but it's checked at runtime. /// /// For each call to there is additional overhead as the type is looked up in a dictionary compared to . public class VowpalWabbitDynamic : IDisposable { private Dictionary serializers; private Dictionary serializeMethods; private VowpalWabbit vw; /// /// Initializes a new instance of the class. /// /// Command line arguments passed to native instance. public VowpalWabbitDynamic(string arguments) : this(new VowpalWabbitSettings(arguments)) { } /// /// Initializes a new instance of the class. /// /// Arguments passed to native instance. public VowpalWabbitDynamic(VowpalWabbitSettings settings) { this.vw = new VowpalWabbit(settings); this.serializers = new Dictionary(); this.serializeMethods = new Dictionary(); } private VowpalWabbitExampleCollection SerializeTyped(T example, ILabel label, int? index) { IDisposable serializer; if (!this.serializers.TryGetValue(typeof(T), out serializer)) { var serializerCompiler = VowpalWabbitSerializerFactory.CreateSerializer(this.vw.Settings); if (serializerCompiler == null) throw new ArgumentException("No feature discovered for type: " + typeof(T)); serializer = serializerCompiler.Create(this.vw); this.serializers.Add(typeof(T), serializer); } return ((IVowpalWabbitSerializer)serializer).Serialize(example, label, index); } private VowpalWabbitExampleCollection Serialize(object example, ILabel label = null, int? index = null) { var type = example.GetType(); MethodInfo method; if (!this.serializeMethods.TryGetValue(type, out method)) { method = typeof(VowpalWabbitDynamic) .GetMethod("SerializeTyped", BindingFlags.Instance | BindingFlags.NonPublic) .MakeGenericMethod(type); this.serializeMethods.Add(type, method); } return (VowpalWabbitExampleCollection)method.Invoke(this, new[] { example, label, index }); } /// /// Learns from the given example. /// /// The example to learn. /// The optional label for this . /// The optional index of the example, the should be attributed to. public void Learn(object example, ILabel label = null, int? index = null) { Contract.Requires(example != null); using (var ex = this.Serialize(example, label, index)) { ex.Learn(); } } /// /// Learns from the given example and returns the current prediction. /// /// The example to learn. /// The prediction factory used to extract the prediction. Use . /// The optional label for this . /// The optional index of the example, the should be attributed to. public TPrediction Learn(object example, IVowpalWabbitPredictionFactory predictionFactory, ILabel label = null, int? index = null) { Contract.Requires(example != null); Contract.Requires(predictionFactory != null); using (var ex = this.Serialize(example, label, index)) { return ex.Learn(predictionFactory); } } /// /// Predict for the given example and return the current prediction for it. /// /// /// /// /// The optional index of the example to evaluate within /// The optional label for the example to evaluate. /// public TPrediction Predict(object example, IVowpalWabbitPredictionFactory predictionFactory, ILabel label = null, int? index = null) { Contract.Requires(example != null); Contract.Requires(predictionFactory != null); using (var ex = this.Serialize(example, label, index)) { return ex.Predict(predictionFactory); } } /// /// The wrapped VW instance. /// public VowpalWabbit Native { get { return this.vw; } } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.serializers != null) { foreach (var serializer in this.serializers) serializer.Value.Dispose(); this.serializers = null; } if (this.vw != null) { this.vw.Dispose(); this.vw = null; } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/VowpalWabbitExampleCollection.cs000066400000000000000000000137721332666127000246210ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; using System.Text; using System.Threading.Tasks; using VW.Labels; namespace VW { /// /// Base class for JSON deserialization result. /// public abstract class VowpalWabbitExampleCollection : IDisposable { /// /// The native VW instance. /// private readonly VowpalWabbit vw; /// /// Initializes a new instance of the class. /// /// The VW native instance. protected VowpalWabbitExampleCollection(VowpalWabbit vw) { Contract.Requires(vw != null); this.vw = vw; } /// /// Learns this example on the VW instance used for marshalling or the optionally passed on . /// /// The optional VW instance used for learning. Defaults to the one used for marshalling. public void Learn(VowpalWabbit vw = null) { this.LearnInternal(vw ?? this.vw); } /// /// Predicts for this example. /// /// Use this VW instance for prediction instead of the one the example was created from. public void Predict(VowpalWabbit vw = null) { this.PredictInternal(vw ?? this.vw); } /// /// Learn from this example and returns the current prediction for it. /// /// The prediction type. /// The prediction factory to be used. See . /// The prediction for the this example. /// Use this VW instance for learning instead of the one the example was created from. public TPrediction Learn(IVowpalWabbitPredictionFactory predictionFactory, VowpalWabbit vw = null) { return this.LearnInternal(predictionFactory, vw ?? this.vw); } /// /// Predicts for this example and returns the current prediction for it. /// /// The prediction type. /// The prediction factory to be used. See . /// The prediction for the this example. /// Use this VW instance for prediction instead of the one the example was created from. public TPrediction Predict(IVowpalWabbitPredictionFactory predictionFactory, VowpalWabbit vw = null) { return this.PredictInternal(predictionFactory, vw ?? this.vw); } /// /// Learns from this example. /// /// Use this VW instance for learning instead of the one the example was created from. protected abstract void LearnInternal(VowpalWabbit vw); /// /// Predicts for this example. /// /// Use this VW instance for prediction instead of the one the example was created from. protected abstract void PredictInternal(VowpalWabbit vw); /// /// Learn from this example and returns the current prediction for it. /// /// The prediction type. /// The prediction factory to be used. See . /// The prediction for the this example. /// Use this VW instance for learning instead of the one the example was created from. protected abstract TPrediction LearnInternal(IVowpalWabbitPredictionFactory predictionFactory, VowpalWabbit vw); /// /// Predicts for this example and returns the current prediction for it. /// /// The prediction type. /// The prediction factory to be used. See . /// The prediction for the this example. /// Use this VW instance for prediction instead of the one the example was created from. protected abstract TPrediction PredictInternal(IVowpalWabbitPredictionFactory predictionFactory, VowpalWabbit vw); /// /// The optional string version of the example. /// public abstract string VowpalWabbitString { get; } /// /// The number of feature this example holds. /// public abstract ulong NumberOfFeatures { get; } /// /// All labels this example holds. /// public abstract IEnumerable Labels { get; } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public abstract void Dispose(); } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/VowpalWabbitMultiLine.cs000066400000000000000000000530041332666127000231040ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; using System.Text; using VW.Labels; using VW.Serializer; namespace VW { /// /// Helper class to properly feed multi-line examples into vw. /// public static class VowpalWabbitMultiLine { /// /// Serializes the specifed example to VW native string format. /// /// The user example type. /// The user action dependent feature type. /// The VW instance. /// The shared example. /// The action dependent features. /// The optional index of the label example. /// The optional label. /// Used to extract features into dictionary. /// Used to extract features into dictionary. This should use a faster comparison mehtod (e.g. reference equals). /// The string serialized example. public static string SerializeToString( VowpalWabbit vw, TExample example, IReadOnlyCollection actionDependentFeatures, int? index = null, ILabel label = null, Dictionary dictionary = null, Dictionary fastDictionary = null) { #if DEBUG if (!vw.Native.Settings.EnableStringExampleGeneration) { throw new ArgumentException("vw.Settings.EnableStringExampleGeneration must be enabled"); } #endif return SerializeToString( vw.Native, example, actionDependentFeatures, index, label, vw.ExampleSerializer, vw.ActionDependentFeatureSerializer, dictionary, fastDictionary); } /// /// Serializes the specifed example to VW native string format. /// /// The user example type. /// The user action dependent feature type. /// The VW instance. /// The shared example. /// The action dependent features. /// The optional index of the label example. /// The optional label. /// The example serializer. /// The action dependent feature serializer. /// Dictionary used for dictify operation. /// Dictionary used for dictify operation. /// The string serialized example. public static string SerializeToString( VowpalWabbit vw, TExample example, IReadOnlyCollection actionDependentFeatures, int? index = null, ILabel label = null, IVowpalWabbitSerializer serializer = null, IVowpalWabbitSerializer actionDependentFeatureSerializer = null, Dictionary dictionary = null, Dictionary fastDictionary = null) { if (vw == null) throw new ArgumentNullException("vw"); if (serializer == null) { serializer = VowpalWabbitSerializerFactory.CreateSerializer(new VowpalWabbitSettings { EnableStringExampleGeneration = true }).Create(vw); } else if (!serializer.EnableStringExampleGeneration) { throw new ArgumentException("Serializer must be compiled using EnableStringExampleGeneration = true"); } if (actionDependentFeatureSerializer == null) { actionDependentFeatureSerializer = VowpalWabbitSerializerFactory.CreateSerializer(new VowpalWabbitSettings { EnableStringExampleGeneration = true }).Create(vw); } else if (!actionDependentFeatureSerializer.EnableStringExampleGeneration) { throw new ArgumentException("Action dependent serializer must be compiled using EnableStringExampleGeneration = true"); } var stringExample = new StringBuilder(); var sharedExample = serializer.SerializeToString(example, SharedLabel.Instance, null, dictionary, fastDictionary); // check if we have shared features if (!string.IsNullOrWhiteSpace(sharedExample)) { stringExample.AppendLine(sharedExample); } var i = 0; foreach (var actionDependentFeature in actionDependentFeatures) { var adfExample = actionDependentFeatureSerializer.SerializeToString(actionDependentFeature, index != null && i == index ? label : null, null, dictionary, fastDictionary); if (!string.IsNullOrWhiteSpace(adfExample)) { stringExample.AppendLine(adfExample); } i++; } return stringExample.ToString(); } /// /// A named delegate for the action to be taken once all the examples are marshalled. /// /// The action dependent feature user type. /// Marshalled valid examples. /// List of valid marshalled examples. /// List of empty non-marshalled examples. public delegate void LearnOrPredictAction( IReadOnlyList validExamples, IReadOnlyList> validActionDependentFeatures, IReadOnlyList> emptyActionDependentFeatures); /// /// Simplify learning of examples with action dependent features. /// /// User example type. /// Action dependent feature type. /// The VowpalWabbit instances. /// The example serializer. /// The action dependent feature serializer. /// The example. /// The action dependent features. /// An action executed once the set of valid examples is determined. /// The optional index of the action dependent feature this label belongs too. /// The optional label to be used for learning or evaluation. public static void Execute( VowpalWabbit vw, VowpalWabbitSingleExampleSerializer serializer, VowpalWabbitSingleExampleSerializer actionDependentFeatureSerializer, TExample example, IReadOnlyCollection actionDependentFeatures, LearnOrPredictAction predictOrLearn, int? index = null, ILabel label = null) { Contract.Requires(vw != null); Contract.Requires(actionDependentFeatureSerializer != null); Contract.Requires(example != null); Contract.Requires(actionDependentFeatures != null); var examples = new List(actionDependentFeatures.Count + 1); var validExamples = new List(actionDependentFeatures.Count + 1); var validActionDependentFeatures = new List>(actionDependentFeatures.Count + 1); var emptyActionDependentFeatures = new List>(actionDependentFeatures.Count + 1); VowpalWabbitExample emptyExample = null; try { // contains prediction results if (serializer != null) { var sharedExample = serializer.Serialize(example, SharedLabel.Instance); // check if we have shared features if (sharedExample != null) { examples.Add(sharedExample); if (!sharedExample.IsNewLine) { validExamples.Add(sharedExample); } } } var i = 0; foreach (var actionDependentFeature in actionDependentFeatures) { var adfExample = actionDependentFeatureSerializer.Serialize(actionDependentFeature, index != null && i == index ? label : null); Contract.Assert(adfExample != null); examples.Add(adfExample); if (!adfExample.IsNewLine) { validExamples.Add(adfExample); validActionDependentFeatures.Add(new ActionDependentFeature(i, actionDependentFeature)); } else { emptyActionDependentFeatures.Add(new ActionDependentFeature(i, actionDependentFeature)); } i++; } if (validActionDependentFeatures.Count == 0) return; // signal we're finished using an empty example emptyExample = vw.GetOrCreateNativeExample(); emptyExample.MakeEmpty(vw); predictOrLearn(validExamples, validActionDependentFeatures, emptyActionDependentFeatures); } finally { if (emptyExample != null) emptyExample.Dispose(); // dispose examples // Note: must not dispose examples before final example // as the learning algorithm (such as cbf) keeps a reference // to the example foreach (var e in examples) e.Dispose(); } } /// /// Simplify learning of examples with action dependent features. /// public static void Learn( VowpalWabbit vw, VowpalWabbitSingleExampleSerializer serializer, VowpalWabbitSingleExampleSerializer actionDependentFeatureSerializer, TExample example, IReadOnlyCollection actionDependentFeatures, int index, ILabel label) { Contract.Requires(vw != null); Contract.Requires(actionDependentFeatureSerializer != null); Contract.Requires(example != null); Contract.Requires(actionDependentFeatures != null); Contract.Requires(index >= 0); Contract.Requires(label != null); Execute( vw, serializer, actionDependentFeatureSerializer, example, actionDependentFeatures, (examples, _, __) => { vw.Learn(examples.ToList()); }, index, label); } /// /// Simplify learning of examples with action dependent features. /// /// The type of the user example. /// The type of the user action dependent features. /// The vw instance. /// The serializer for . /// The serializer for . /// The user example. /// The action dependent features. /// The index of action dependent feature to label. /// The label for the selected action dependent feature. /// An ranked subset of predicted actions. public static ActionDependentFeature[] LearnAndPredict( VowpalWabbit vw, VowpalWabbitSingleExampleSerializer serializer, VowpalWabbitSingleExampleSerializer actionDependentFeatureSerializer, TExample example, IReadOnlyCollection actionDependentFeatures, int index, ILabel label) { Contract.Requires(vw != null); Contract.Requires(actionDependentFeatureSerializer != null); Contract.Requires(example != null); Contract.Requires(actionDependentFeatures != null); Contract.Requires(index >= 0); Contract.Requires(label != null); ActionDependentFeature[] predictions = null; Execute( vw, serializer, actionDependentFeatureSerializer, example, actionDependentFeatures, (examples, validActionDependentFeatures, emptyActionDependentFeatures) => { var ex_list = examples.ToList(); vw.Learn(ex_list); predictions = VowpalWabbitMultiLine.GetPrediction(vw, examples, validActionDependentFeatures, emptyActionDependentFeatures); }, index, label); // default to the input list return predictions ?? actionDependentFeatures.Select((o, i) => new ActionDependentFeature(i, o)).ToArray(); } /// /// Simplify prediction of examples with action dependent features. /// /// The type of the user example. /// The type of the user action dependent features. /// The vw instance. /// The serializer for . /// The serializer for . /// The user example. /// The action dependent features. /// The index of action dependent feature to label. /// The label for the selected action dependent feature. /// An ranked subset of predicted actions. public static ActionDependentFeature[] Predict( VowpalWabbit vw, VowpalWabbitSingleExampleSerializer serializer, VowpalWabbitSingleExampleSerializer actionDependentFeatureSerializer, TExample example, IReadOnlyCollection actionDependentFeatures, int? index = null, ILabel label = null) { Contract.Requires(vw != null); Contract.Requires(actionDependentFeatureSerializer != null); Contract.Requires(example != null); Contract.Requires(actionDependentFeatures != null); ActionDependentFeature[] predictions = null; Execute( vw, serializer, actionDependentFeatureSerializer, example, actionDependentFeatures, (examples, validActionDependentFeatures, emptyActionDependentFeatures) => { var ex_col = examples.ToList(); vw.Predict(ex_col); predictions = VowpalWabbitMultiLine.GetPrediction(vw, examples, validActionDependentFeatures, emptyActionDependentFeatures); }, index, label); // default to the input list return predictions ?? actionDependentFeatures.Select((o, i) => new ActionDependentFeature(i, o)).ToArray(); } /// /// Extracts the prediction, orders the action depdendent feature objects accordingly and appends the /// action dependent feature objcts that did produce empty examples at the end. /// /// The action dependent feature type. /// The Vowpal Wabbit instance.> /// The list of examples. /// The list of non-empty action dependent feature objects. /// The list of empty action dependent feature objects. /// Returns the ranked list of action dependent features. public static ActionDependentFeature[] GetPrediction( VowpalWabbit vw, IReadOnlyList examples, IReadOnlyList> validActionDependentFeatures, IReadOnlyList> emptyActionDependentFeatures) { // Since the prediction result is stored in the first example // and we'll have to get an actual VowpalWabbitExampt var firstExample = examples.FirstOrDefault(); if (firstExample == null) { return null; } ActionDependentFeature[] result; int i = 0; var values = firstExample.GetPrediction(vw, VowpalWabbitPredictionType.Dynamic); var actionScores = values as ActionScore[]; if (actionScores != null) { if (actionScores.Length != validActionDependentFeatures.Count) throw new InvalidOperationException("Number of predictions returned unequal number of examples fed"); result = new ActionDependentFeature[validActionDependentFeatures.Count + emptyActionDependentFeatures.Count]; foreach (var index in actionScores) { result[i] = validActionDependentFeatures[(int)index.Action]; result[i].Probability = index.Score; i++; } } else { var multilabel = values as int[]; if (multilabel != null) { if (multilabel.Length != validActionDependentFeatures.Count) throw new InvalidOperationException("Number of predictions returned unequal number of examples fed"); result = new ActionDependentFeature[validActionDependentFeatures.Count + emptyActionDependentFeatures.Count]; foreach (var index in multilabel) result[i++] = validActionDependentFeatures[index]; result[0].Probability = 1f; } else throw new NotSupportedException("Unsupported return type: " + values.GetType()); } // append invalid ones at the end foreach (var f in emptyActionDependentFeatures) result[i++] = f; return result; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/VowpalWabbitMultiLineExampleCollection.cs000066400000000000000000000164021332666127000264350ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; using VW.Labels; namespace VW { /// /// Result for multiline examples. /// public sealed class VowpalWabbitMultiLineExampleCollection : VowpalWabbitExampleCollection { private readonly ulong numberOfFeatures; /// /// Initializes a new instance of the class. /// public VowpalWabbitMultiLineExampleCollection(VowpalWabbit vw, VowpalWabbitExample shared, VowpalWabbitExample[] examples) : base(vw) { Contract.Requires(examples != null); this.SharedExample = shared; this.Examples = examples; if (shared != null) numberOfFeatures += shared.NumberOfFeatures; foreach (var e in examples) if (e != null) numberOfFeatures += e.NumberOfFeatures; } /// /// Single example or the shared /// public VowpalWabbitExample SharedExample { get; private set; } /// /// The multi-line examples /// public VowpalWabbitExample[] Examples { get; private set; } /// /// The number of feature this example holds. /// public override ulong NumberOfFeatures { get { return this.numberOfFeatures; } } /// /// Calls learn or predict for the set of examples. Does required filtering of potential new line examples. /// private TPrediction Execute(VowpalWabbit vw, Action> predictOrLearn, IVowpalWabbitPredictionFactory predictionFactory = null) { Contract.Requires(predictOrLearn != null); // firstExample will contain prediction result VowpalWabbitExample firstExample = null; VowpalWabbitExample empty = null; try { var ecCol = new List(); if (this.SharedExample != null && !this.SharedExample.IsNewLine) { firstExample = this.SharedExample; ecCol.Add(firstExample); } foreach (var ex in this.Examples) { if (!ex.IsNewLine) { ecCol.Add(ex); if (firstExample == null) firstExample = ex; } } // signal end-of-block empty = vw.GetOrCreateNativeExample(); empty.MakeEmpty(vw); predictOrLearn(ecCol); return predictionFactory != null ? firstExample.GetPrediction(vw, predictionFactory) : default(TPrediction); } finally { if (empty != null) empty.Dispose(); } } /// /// Learns from these examples. /// protected override void LearnInternal(VowpalWabbit vw) { // unfortunately can't specify this.Execute(vw, ex => vw.Learn(ex)); } /// /// Learn from these examples and returns the current prediction for it. /// /// The prediction type. /// The prediction factory to be used. See . /// The VW instance that should be used for learning. /// The prediction for the this example. protected override TPrediction LearnInternal(IVowpalWabbitPredictionFactory predictionFactory, VowpalWabbit vw) { return this.Execute(vw, ex => vw.Learn(ex), predictionFactory); } /// /// Predicts for these examples. /// protected override void PredictInternal(VowpalWabbit vw) { // unfortunately can't specify this.Execute(vw, ex => vw.Predict(ex)); } /// /// Predicts for these examples and returns the current prediction for it. /// /// The prediction type. /// The prediction factory to be used. See . /// The native VW instance. /// The prediction for the this example. protected override TPrediction PredictInternal(IVowpalWabbitPredictionFactory predictionFactory, VowpalWabbit vw) { return this.Execute(vw, ex => vw.Predict(ex), predictionFactory); } /// /// The optional string version of the example. /// public override string VowpalWabbitString { get { var str = new List(); if (this.SharedExample != null) str.Add(this.SharedExample.VowpalWabbitString); str.AddRange(this.Examples.Select(e => e.VowpalWabbitString)); // filter empty example return string.Join("\n", str.Where(s => !string.IsNullOrWhiteSpace(s))); } } /// /// All labels this example holds. /// public override IEnumerable Labels { get { return this.Examples.Select(e => e.Label); } } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public override void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.SharedExample != null) { this.SharedExample.Dispose(); this.SharedExample = null; } if (this.Examples != null) { foreach (var ex in this.Examples) if (ex != null) ex.Dispose(); this.Examples = null; } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/VowpalWabbitMultiworldTesting.cs000066400000000000000000000122311332666127000246770ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.ComponentModel; using System.IO; using System.Linq; using VW.Labels; using VW.Serializer.Attributes; namespace VW { /// /// A wrapper for --multiworld_testing mode. /// public sealed class VowpalWabbitMultiworldTesting : IDisposable { private VowpalWabbit vw; /// /// /// /// Optional model to see multiworld testing public VowpalWabbitMultiworldTesting(Stream vwModel = null) { var settings = vwModel == null ? new VowpalWabbitSettings("--multiworld_test f") : new VowpalWabbitSettings { ModelStream = vwModel }; this.vw = new VowpalWabbit(settings); } /// /// Evaluates and x constants policies w.r.t. to . /// /// The learned action. /// The number constant policies to be evaluated. /// The label. /// public PoliciesPerformance Evaluate(uint learnedAction, int numActions, ContextualBanditLabel label) { return new PoliciesPerformance( this.vw.Learn( new LearnedVsConstantPolicy(learnedAction, numActions), label, VowpalWabbitPredictionType.Scalars)); } /// /// The assocated VW instance. /// public VowpalWabbit Native { get { return this.vw.Native; } } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.vw != null) { this.vw.Dispose(); this.vw = null; } } } /// /// Contains evaluation result for constant policies and the currently learned policy. /// public sealed class PoliciesPerformance { private float[] data; /// /// Initializes a new instance. /// /// The performance data. Learned policy is at position 0. public PoliciesPerformance(float[] data) { this.data = data; } /// /// The performance of the learned policy. /// public float LearnedPolicy { get { return this.data[0]; } } /// /// The number of constant policies evaluated. /// public int NumConstantPolicies { get { return this.data.Length - 1; } } /// /// The performance of each constant policy. /// public IEnumerable ConstantPolicies { get { return this.data.Skip(1); } } } /// /// Must be public for the serializer to work with it. /// [EditorBrowsableAttribute(EditorBrowsableState.Never)] public sealed class LearnedVsConstantPolicy { private uint learnedAction; private int numConstantActions; internal LearnedVsConstantPolicy(uint learnedAction, int numConstantActions) { this.learnedAction = learnedAction; this.numConstantActions = numConstantActions; } /// /// The constant policies actions. /// [Feature(FeatureGroup = 'f')] public IEnumerable Actions { get { yield return learnedAction; for (uint i = 0; i < this.numConstantActions; i++) { yield return i + 1; } } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/VowpalWabbitSerializationException.cs000066400000000000000000000026321332666127000256770ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using VW.Serializer.Intermediate; namespace VW { /// /// Exception thrown while serialization. /// public class VowpalWabbitSerializationException : Exception { /// /// Constructs new exception /// public VowpalWabbitSerializationException(string message, Exception innerException, Namespace ns, Feature feature) : base($"{message}. Namespace: {ns.Name}. Feature: {feature.Name}", innerException) { this.Namespace = ns; this.Feature = feature; } /// /// The related namespace for this exception. /// public Namespace Namespace { get; private set; } /// /// The related feature for this feature. /// public Feature Feature { get; private set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/VowpalWabbitSingleLineExampleCollection.cs000066400000000000000000000107661332666127000265730ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; using System.Text; using System.Threading.Tasks; using VW.Labels; namespace VW { /// /// Result for a single example. /// public sealed class VowpalWabbitSingleLineExampleCollection : VowpalWabbitExampleCollection { /// /// Initializes a new instance of the class. /// public VowpalWabbitSingleLineExampleCollection(VowpalWabbit vw, VowpalWabbitExample example) : base(vw) { Contract.Requires(example != null); this.Example = example; } /// /// Single example or the shared /// public VowpalWabbitExample Example { get; private set; } /// /// Learns from this example. /// protected override void LearnInternal(VowpalWabbit vw) { vw.Learn(this.Example); } /// /// Learn from this example and returns the current prediction for it. /// /// The prediction type. /// The prediction factory to be used. See . /// The VW native instance. /// The prediction for the this example. protected override TPrediction LearnInternal(IVowpalWabbitPredictionFactory predictionFactory, VowpalWabbit vw) { return vw.Learn(this.Example, predictionFactory); } /// /// Predicts for this example. /// protected override void PredictInternal(VowpalWabbit vw) { vw.Predict(this.Example); } /// /// Predicts for this example and returns the current prediction for it. /// /// The prediction type. /// The prediction factory to be used. See . /// The VW instance that should be used for prediction. /// The prediction for the this example. protected override TPrediction PredictInternal(IVowpalWabbitPredictionFactory predictionFactory, VowpalWabbit vw) { return vw.Predict(this.Example, predictionFactory); } /// /// The optional string version of the example. /// public override string VowpalWabbitString { get { return this.Example.VowpalWabbitString; } } /// /// The number of feature this example holds. /// public override ulong NumberOfFeatures { get { return this.Example.NumberOfFeatures; } } /// /// All labels this example holds. /// public override IEnumerable Labels { get { yield return this.Example.Label; } } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public override void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.Example != null) { this.Example.Dispose(); this.Example = null; } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/VowpalWabbitSweep.cs000066400000000000000000000223061332666127000222660ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; using System.Text; using System.Threading.Tasks; using VW.Labels; using VW.Serializer; namespace VW { /// /// Sweeping wrapper for multiline examples. Designed to re-use allocated examples /// across multiple Vowpal Wabbit instances. So far plain parallelization yielded /// faster training times at least on a 20 core machine. /// /// User example type. /// Action dependent feature type. public class VowpalWabbitSweep : IDisposable { private const int NumberOfVWInstancesSharingExamples = 1; private VowpalWabbit[] vws; private List settings; private VowpalWabbitSingleExampleSerializer[] serializers; private VowpalWabbitSingleExampleSerializer[] actionDependentFeatureSerializers; /// /// Initializes a new instance. /// /// The list of settings to be used. public VowpalWabbitSweep(List settings) { if (settings == null || settings.Count == 0) throw new ArgumentException("settings"); Contract.EndContractBlock(); // TODO: check that the sweeps are not across incompatible options. this.settings = settings; this.vws = settings.Select(setting => new VowpalWabbit(setting)).ToArray(); var diffs = this.vws.Skip(1).Select(vw => vw.AreFeaturesCompatible(this.vws[0])).Where(e => e != null).ToList(); if (diffs.Count > 0) throw new ArgumentException("Feature settings are not compatible for sweeping: " + string.Join(",", diffs)); this.serializers = this.vws.Select(vw => (VowpalWabbitSingleExampleSerializer)VowpalWabbitSerializerFactory.CreateSerializer(vw.Settings).Create(vw)) .ToArray(); this.actionDependentFeatureSerializers = this.vws.Select(vw => (VowpalWabbitSingleExampleSerializer)VowpalWabbitSerializerFactory.CreateSerializer(vw.Settings).Create(vw)) .ToArray(); } /// /// The internally allocated VW instances. /// public VowpalWabbit[] VowpalWabbits { get { return this.vws; } } /// /// Creates a partioner used for parallel scenarios. /// /// An ordered partitioner. public OrderablePartitioner> CreatePartitioner() { return Partitioner.Create(0, this.vws.Length, Math.Min(this.vws.Length, NumberOfVWInstancesSharingExamples)); } /// /// Learn from the given example and return the current prediction for it. /// /// The shared example. /// Instance number to start from. /// Instance number to end at. /// The action dependent features. /// The index of the example to learn within . /// The label for the example to learn. public void Learn(int fromInclusive, int toExclusive, TExample example, IReadOnlyCollection actionDependentFeatures, int index, ILabel label) { if (fromInclusive != toExclusive - 1) throw new ArgumentException("fromInclusive"); VowpalWabbitMultiLine.Execute(this.vws[fromInclusive], this.serializers[fromInclusive], this.actionDependentFeatureSerializers[fromInclusive], example, actionDependentFeatures, (examples, _, __) => { for (int i = fromInclusive; i < toExclusive; i++) { foreach (var ex in examples) { this.vws[i].Learn(ex); } } }, index, label); } /// /// Prediction sweep. /// /// /// /// /// /// /// /// public TActionDependentFeature[][] Predict(int fromInclusive, int toExclusive, TExample example, IReadOnlyCollection actionDependentFeatures, int index, ILabel label) { if (fromInclusive != toExclusive - 1) throw new ArgumentException("fromInclusive"); var result = new TActionDependentFeature[toExclusive - fromInclusive][]; VowpalWabbitMultiLine.Execute(this.vws[fromInclusive], this.serializers[fromInclusive], this.actionDependentFeatureSerializers[fromInclusive], example, actionDependentFeatures, (examples, validActionDependentFeatures, emptyActionDependentFeatures) => { for (int i = fromInclusive; i < toExclusive; i++) { // feed all examples for this block foreach (var ex in examples) { this.vws[i].Predict(ex); } result[i - fromInclusive] = VowpalWabbitMultiLine.GetPrediction(this.vws[i], examples, validActionDependentFeatures, emptyActionDependentFeatures) .Select(p => p.Feature).ToArray(); } }, index, label); return result; } /// /// Save all models with the given prfix. /// /// /// public List SaveModels(string modelPrefix)         {             return this.vws.Select((vw, i) =>             {                 var modelName = modelPrefix + "-" + i;                 vw.SaveModel(modelName);                 return modelName;             })             .ToList();         }  /// /// Reload all models. /// public void Reload()         {             foreach (var vw in this.vws)             {                 vw.Reload();             }         }  /// /// Executes the given action on each VW instance. /// /// The action to execute. public void Execute(Action, VowpalWabbitSingleExampleSerializer, int> action)         {             Parallel.For(                 0, this.vws.Length,                 new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount / 2 },                 i => action(this.vws[i], this.serializers[i], this.actionDependentFeatureSerializers[i], i));         }  /// /// Dispose resources. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.vws != null) { foreach (var vw in this.vws) { vw.Dispose(); } this.vws = null; } if (this.serializers != null) { foreach (var s in this.serializers) { s.Dispose(); } this.serializers = null; } if (this.actionDependentFeatureSerializers != null) { foreach (var s in this.actionDependentFeatureSerializers) { s.Dispose(); } this.actionDependentFeatureSerializers = null; } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/VowpalWabbitThreadedPrediction.cs000066400000000000000000000143301332666127000247420ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using VW.Serializer; namespace VW { /// /// Enables multi-threaded prediction by utilizing a pool of instances. /// public sealed class VowpalWabbitThreadedPrediction : VowpalWabbitThreadedPredictionBase { /// /// Initializes a new instance of . /// /// Decision Service client library needs default constructor. public VowpalWabbitThreadedPrediction() { } /// /// Initializes a new instance of . /// /// The model used by each pool instance. public VowpalWabbitThreadedPrediction(VowpalWabbitModel model) : base(model) { } /// /// Initializes a new instance of . /// /// The settings used by each pool instance. public VowpalWabbitThreadedPrediction(VowpalWabbitSettings settings) : base(settings) { } /// /// Returns the same instance as no wrapping is required. /// /// The wrapped vw instance. protected override VowpalWabbit InternalCreate(VowpalWabbit vw) { return vw; } } /// /// Enables multi-threaded prediction by utilizing a pool of instances. /// /// The type use for providing data to VW using the serializer infrastructure. public sealed class VowpalWabbitThreadedPrediction : VowpalWabbitThreadedPredictionBase> { /// /// Initializes a new instance of . /// /// Decision Service client library needs default constructor. public VowpalWabbitThreadedPrediction() { } /// /// Initializes a new instance of . /// /// The model used by each pool instance. /// Optional pre-compiled serializer. public VowpalWabbitThreadedPrediction(VowpalWabbitModel model, IVowpalWabbitSerializerCompiler compiledSerializer = null) : base(model) { this.CompiledSerializer = compiledSerializer; } /// /// Initializes a new instance of . /// /// The settings used by each pool instance. public VowpalWabbitThreadedPrediction(VowpalWabbitSettings settings) : base(settings) { } /// /// The Serializer used to marshal examples. /// public IVowpalWabbitSerializerCompiler CompiledSerializer { get; private set; } /// /// Creates a new instance of . /// /// The wrapped vw instance. protected override VowpalWabbit InternalCreate(VowpalWabbit vw) { if (this.CompiledSerializer == null) this.CompiledSerializer = VowpalWabbitSerializerFactory.CreateSerializer(vw.Settings); return new VowpalWabbit(vw, this.CompiledSerializer); } } /// /// Enables multi-threaded prediction by utilizing a pool of instances. /// /// The type use for providing data to VW using the serializer infrastructure. /// The type use for providing action dependent data to VW using the serializer infrastructure. public sealed class VowpalWabbitThreadedPrediction : VowpalWabbitThreadedPredictionBase> { /// /// Initializes a new instance of . /// /// Decision Service client library needs default constructor. public VowpalWabbitThreadedPrediction() { } /// /// Initializes a new instance of . /// /// The model used by each pool instance. public VowpalWabbitThreadedPrediction(VowpalWabbitModel model) : base(model) { } /// /// Initializes a new instance of . /// /// The settings used by each pool instance. public VowpalWabbitThreadedPrediction(VowpalWabbitSettings settings) : base(settings) { } /// /// Creates a new instance of . /// /// The wrapped vw instance. protected override VowpalWabbit InternalCreate(VowpalWabbit vw) { return new VowpalWabbit(vw); } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/VowpalWabbitThreadedPredictionBase.cs000066400000000000000000000105771332666127000255460ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Diagnostics; using System.Diagnostics.Contracts; namespace VW { /// /// Enables multi-threaded prediction by utilizing a pool of instances. /// /// The VowpalWabbit wrapper type used. public abstract class VowpalWabbitThreadedPredictionBase : IDisposable where TVowpalWabbit : class, IDisposable { /// /// The pool of potentially wrapped VW instances. /// private ObjectPool vwPool; private VowpalWabbitSettings settings; /// /// Initializes a new instance of the class. /// /// The initial model to use. protected VowpalWabbitThreadedPredictionBase(VowpalWabbitModel model = null) : this(new VowpalWabbitSettings() { Model = model }) { } /// /// Initializes a new instance of the class. /// /// The initial settings to use. protected VowpalWabbitThreadedPredictionBase(VowpalWabbitSettings settings) { this.settings = settings; this.vwPool = new ObjectPool( ObjectFactory.Create( settings.Model, m => { if (m == null) return default(TVowpalWabbit); return CreateVowpalWabbitChild(m); })); } private TVowpalWabbit CreateVowpalWabbitChild(VowpalWabbitModel model) { var newSettings = (VowpalWabbitSettings)this.settings.Clone(); newSettings.Model = model; var vw = new VowpalWabbit(newSettings); return this.InternalCreate(vw); } /// /// Implementors create new VW wrapper instances. /// /// The native VW instance. /// The new VW wrapper instance. protected abstract TVowpalWabbit InternalCreate(VowpalWabbit vw); /// /// Updates the model used for prediction in a thread-safe manner. /// /// The new model to be used. public void UpdateModel(VowpalWabbitModel model) { this.vwPool.UpdateFactory(ObjectFactory.Create( model, this.CreateVowpalWabbitChild)); } /// /// Gets or creates a new VW wrapper instance. /// /// A ready to use VW wrapper instance. /// can be null if no model was supplied yet. public PooledObject GetOrCreate() { Contract.Ensures(Contract.Result>() != null); return this.vwPool.GetOrCreate(); } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.vwPool != null) { this.vwPool.Dispose(); this.vwPool = null; } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs/cs.csproj000066400000000000000000000325641332666127000201700ustar00rootroot00000000000000 Debug AnyCPU {E4E962AE-7056-4EB0-A8C5-8DC824A4B068} Library Properties VW VowpalWabbit v4.5.2 512 true ..\..\vowpalwabbit\ 0 true $(SolutionDir)\x64\Debug\ TRACE;DEBUG full x64 prompt MinimumRecommendedRules.ruleset ..\..\vowpalwabbit\x64\Debug\VowpalWabbit.XML True False True False False True True True True True True True True True False True False True False False False False True False True True True False False True False False True Full %28none%29 0 true $(SolutionDir)\x64\Release\ TRACE true pdbonly x64 prompt MinimumRecommendedRules.ruleset ..\..\vowpalwabbit\x64\Release\VowpalWabbit.XML true true ..\vw_key.snk true $(SolutionDir)\x64\DebugLeakCheck\ TRACE;DEBUG true $(SolutionDir)\x64\DebugLeakCheck\VowpalWabbit.XML full x64 prompt MinimumRecommendedRules.ruleset True True VowpalWabbitDefaultMarshallerExt.tt {85e55ae0-3784-4968-9271-c81af560e1c1} vw_clr {e621e022-c1f8-433f-905a-ab9a3de072b7} vw_common Designer TextTemplatingFileGenerator VowpalWabbitDefaultMarshallerExt.cs This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. vowpal-wabbit-8.6.1.dfsg1/cs/cs/cs.nuspec000066400000000000000000000040231332666127000201520ustar00rootroot00000000000000 Vowpal Wabbit VowpalWabbit vw vowpal wabbit langford ml machine learning John Langford et al $version$ https://github.com/JohnLangford/vowpal_wabbit/wiki/C%23-Binding https://github.com/JohnLangford/vowpal_wabbit/blob/master/LICENSE false Official Vowpal Wabbit library including C# interface Copyright (C) Microsoft Corp 2012-2016, Yahoo! Inc. 2007-2012, and many individual contributors. All rights reserved. vowpal-wabbit-8.6.1.dfsg1/cs/cs/packages.config000066400000000000000000000005761332666127000213040ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/cs/web.config.transform000066400000000000000000000005421332666127000223060ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/cs_console/000077500000000000000000000000001332666127000200515ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/cs_console/App.config000066400000000000000000000010341332666127000217560ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/cs_console/Program.cs000066400000000000000000000103561332666127000220140ustar00rootroot00000000000000using Newtonsoft.Json; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Reflection; using System.Text; using System.Threading.Tasks; using VW; namespace cs_vw { public class Program { enum FileMode { JsonArray, JsonNewLine } private static FileMode DetectedFileMode(string file) { // auto-detect if it's line separated or array based using (var reader = new StreamReader(file)) { var buffer = new char[32]; int readChars; while ((readChars = reader.Read(buffer, 0, buffer.Length)) > 0) { for (int i = 0; i < readChars; i++) { var c = buffer[i]; switch (c) { case ' ': case '\t': case '\n': case '\r': continue; case '[': return FileMode.JsonArray; case '{': return FileMode.JsonNewLine; default: throw new ArgumentException("Invalid character: " + c); } } } } throw new ArgumentException("Empty file"); } public static void Main(string[] args) { // first argument needs to end with .json if (args.Length == 0) { Console.Error.WriteLine( "Usage: {0} ...", Path.GetFileName(Assembly.GetExecutingAssembly().Location)); return; } try { var json = args[0]; var vwArguments = string.Join(" ", args.Skip(1)); var fileMode = DetectedFileMode(json); using (var vw = new VowpalWabbitJson(vwArguments)) { switch (fileMode) { case FileMode.JsonArray: using (var reader = new JsonTextReader(new StreamReader(json))) { if (!reader.Read()) return; if (reader.TokenType != JsonToken.StartArray) return; while (reader.Read()) { switch (reader.TokenType) { case JsonToken.StartObject: vw.Learn(reader); break; case JsonToken.EndObject: // skip break; case JsonToken.EndArray: // end reading return; } } } break; case FileMode.JsonNewLine: using (var reader = new StreamReader(json)) { string line; while ((line = reader.ReadLine()) != null) { if (string.IsNullOrWhiteSpace(line)) continue; vw.Learn(line); } } break; } } } catch (Exception e) { Console.Error.WriteLine("Exception: {0}.\n{1}", e.Message, e.StackTrace); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_console/Properties/000077500000000000000000000000001332666127000222055ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/cs_console/Properties/AssemblyInfo.cs000066400000000000000000000022331332666127000251270ustar00rootroot00000000000000//------------------------------------------------------------------------------ // // This code was generated by a tool. // Runtime Version:4.0.30319.42000 // // Changes to this file may cause incorrect behavior and will be lost if // the code is regenerated. // //------------------------------------------------------------------------------ [assembly: System.Reflection.AssemblyTitle("Vowpal Wabbit Console")] [assembly: System.Reflection.AssemblyDescription("Vowpal Wabbit Console")] [assembly: System.Reflection.AssemblyCompany("Microsoft Corp")] [assembly: System.Reflection.AssemblyProduct("Vowpal Wabbit")] [assembly: System.Reflection.AssemblyCopyright("Copyright (C) Microsoft Corp 2012-2016, Yahoo! Inc. 2007-2012, and many individua" + "l contributors. All rights reserved")] [assembly: System.Runtime.InteropServices.ComVisible(false)] [assembly: System.CLSCompliant(false)] [assembly: System.Runtime.InteropServices.Guid("c7c26e42-6d03-4fe5-943c-add2440f1e37")] [assembly: System.Reflection.AssemblyVersion("8.4.0.1")] [assembly: System.Reflection.AssemblyFileVersion("8.4.0.1")] vowpal-wabbit-8.6.1.dfsg1/cs/cs_console/cs_console.csproj000066400000000000000000000132221332666127000234220ustar00rootroot00000000000000 Debug AnyCPU {01A85382-C3E9-480A-86BF-FAFE4AD107A7} Exe Properties VowpalWabbit.Console vw_json v4.5.2 512 $(SolutionDir)\ true ..\vw_key.snk true $(SolutionDir)\x64\Debug\ DEBUG;TRACE full x64 prompt MinimumRecommendedRules.ruleset true $(SolutionDir)\x64\Release\ TRACE true pdbonly x64 prompt MinimumRecommendedRules.ruleset true $(SolutionDir)\packages\Newtonsoft.Json.9.0.1\lib\net45\Newtonsoft.Json.dll True Designer Designer {85e55ae0-3784-4968-9271-c81af560e1c1} vw_clr {e621e022-c1f8-433f-905a-ab9a3de072b7} vw_common {e4e962ae-7056-4eb0-a8c5-8dc824a4b068} cs {9e27fa94-ab34-4736-8427-fb7a2ba90d52} cs_json This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. vowpal-wabbit-8.6.1.dfsg1/cs/cs_console/packages.config000066400000000000000000000003651332666127000230220ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/cs_console/setenv.bat000066400000000000000000000003311332666127000220420ustar00rootroot00000000000000@ECHO OFF SET Path=%Path%;%programfiles%\VowpalWabbit @echo Welcome to Vowpal Wabbit Enviroment @echo. @echo vw.exe ... regular Vowpal Wabbit machine learning @echo vw_json.exe ... JSON ingesting Vowpal Wabbit vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/000077500000000000000000000000001332666127000173605ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/Properties/000077500000000000000000000000001332666127000215145ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/Properties/AssemblyInfo.cs000066400000000000000000000022251332666127000244370ustar00rootroot00000000000000//------------------------------------------------------------------------------ // // This code was generated by a tool. // Runtime Version:4.0.30319.42000 // // Changes to this file may cause incorrect behavior and will be lost if // the code is regenerated. // //------------------------------------------------------------------------------ [assembly: System.Reflection.AssemblyTitle("Vowpal Wabbit JSON")] [assembly: System.Reflection.AssemblyDescription("Vowpal Wabbit JSON")] [assembly: System.Reflection.AssemblyCompany("Microsoft Corp")] [assembly: System.Reflection.AssemblyProduct("Vowpal Wabbit")] [assembly: System.Reflection.AssemblyCopyright("Copyright (C) Microsoft Corp 2012-2016, Yahoo! Inc. 2007-2012, and many individua" + "l contributors. All rights reserved")] [assembly: System.Runtime.InteropServices.ComVisible(false)] [assembly: System.CLSCompliant(false)] [assembly: System.Runtime.InteropServices.Guid("8a34db14-bac2-474b-8102-be25ca5f2c55")] [assembly: System.Reflection.AssemblyVersion("8.4.0.1")] [assembly: System.Reflection.AssemblyFileVersion("8.4.0.1")] vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/Serializer/000077500000000000000000000000001332666127000214715ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/Serializer/IVowpalWabbitJsonConverter.cs000066400000000000000000000017611332666127000272610ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System.Collections.Generic; namespace VW.Serializer { /// /// Optimization interface for JsonConverter holding one or more independently parseable JSON fragments. /// /// /// This avoids string copying. /// public interface IVowpalWabbitJsonConverter { /// /// List of independently parseable JSON fragments. /// IEnumerable JsonFragments(object value); } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/Serializer/JsonRawStringConverter.cs000066400000000000000000000041161332666127000264640ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Newtonsoft.Json; using System; using System.Collections.Generic; namespace VW.Serializer { /// /// Custom JSON converter returning the underlying raw json (avoiding object allocation). /// public class JsonRawStringConverter : JsonConverter, IVowpalWabbitJsonConverter { /// /// Supports string only. /// public override bool CanConvert(Type objectType) { return objectType == typeof(string); } /// /// Not implemented. /// public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) { throw new NotImplementedException(); } /// /// Outputs the string contents as JSON. /// public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) { var valueString = value as string; if (valueString != null) { writer.WriteRawValue(valueString); return; } serializer.Serialize(writer, value); } /// /// List of independently parseable JSON fragments. /// public IEnumerable JsonFragments(object value) { var valueString = value as string; if (valueString != null) { yield return valueString; yield break; } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/Serializer/JsonRawStringListConverter.cs000066400000000000000000000044311332666127000273200ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Newtonsoft.Json; using System; using System.Collections.Generic; namespace VW.Serializer { /// /// Custom JSON converter returning the underlying raw json (avoiding object allocation). /// public class JsonRawStringListConverter : JsonConverter, IVowpalWabbitJsonConverter { /// /// Supports string only. /// public override bool CanConvert(Type objectType) { return objectType == typeof(List); } /// /// Not implemented. /// public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) { throw new NotImplementedException(); } /// /// Outputs the string contents as JSON. /// public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) { var valueStringEnumerable = value as List; if (valueStringEnumerable != null) { writer.WriteStartArray(); foreach (var str in valueStringEnumerable) writer.WriteRawValue(str); writer.WriteEndArray(); return; } serializer.Serialize(writer, value); } /// /// List of independently parseable JSON fragments. /// public IEnumerable JsonFragments(object value) { var valueStringList = value as List; if (valueStringList == null) throw new ArgumentException($"Unsupported type: {value}"); return valueStringList; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/Serializer/JsonTypeInspector.cs000066400000000000000000000430531332666127000254670ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Newtonsoft.Json; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Linq.Expressions; using System.Reflection; using System.Text; using VW.Labels; namespace VW.Serializer { /// /// Utility class analyzing compile-time annotation. /// public static class JsonTypeInspector { /// /// The singleton native instance. /// public static readonly ITypeInspector Default = new JsonTypeInspectorImpl(); private sealed class JsonTypeInspectorImpl : ITypeInspector { public Schema CreateSchema(VowpalWabbitSettings settings, Type type) { return JsonTypeInspector.CreateSchema(type, settings.PropertyConfiguration); } } private static readonly Type[] SupportedTypes; private static readonly Type[] DictTypes; static JsonTypeInspector() { var numericElementTypes = new[] { typeof(byte), typeof(sbyte), typeof(Int16), typeof(Int32), typeof(UInt16), typeof(UInt32), typeof(float), typeof(Int64), typeof(UInt64), typeof(double) }; var enumerableType = typeof(IEnumerable).GetGenericTypeDefinition(); var dictType = typeof(IDictionary).GetGenericTypeDefinition(); SupportedTypes = new[] { typeof(char), typeof(bool), typeof(string), typeof(double), typeof(float), typeof(byte), typeof(decimal), typeof(UInt16), typeof(UInt32), typeof(UInt64), typeof(Int16), typeof(Int32), typeof(Int64), } .Union(numericElementTypes.Select(valueType => enumerableType.MakeGenericType(valueType))) .Union(numericElementTypes.Select(valueType => valueType.MakeArrayType())) .ToArray(); DictTypes = numericElementTypes .SelectMany(valueType => new[] { dictType.MakeGenericType(typeof(string), valueType), dictType.MakeGenericType(typeof(string), valueType.MakeArrayType()), }) .ToArray(); } private static bool IsTypeSupported(Type type) { return SupportedTypes.Any(t => t.IsAssignableFrom(type)); } private static bool IsDictType(Type type) { return DictTypes.Any(t => t.IsAssignableFrom(type)); } /// /// Extract the JSON.NET from the type. Defaults to . /// /// is not supported. private static MemberSerialization GetMemberSerialiation(Type type) { var jsonObjectAttr = (JsonObjectAttribute)type.GetCustomAttributes(typeof(JsonObjectAttribute), true).FirstOrDefault(); if (jsonObjectAttr == null) return MemberSerialization.OptOut; if (jsonObjectAttr.MemberSerialization == MemberSerialization.Fields) throw new ArgumentException("MemberSerialization.Fields is set on type " + type + " and is not supported"); return jsonObjectAttr.MemberSerialization; } private static Func CreateValueExpressionFactory(PropertyInfo namespacePropertyInfo, PropertyInfo featurePropertyInfo) { Func baseExpression = v => namespacePropertyInfo == null ? v : // CODE example Expression.Property(v, namespacePropertyInfo); // CODE example.NamespaceProperty var attr = featurePropertyInfo.GetCustomAttribute(typeof(JsonConverterAttribute), true) as JsonConverterAttribute; if (attr == null) // CODE example.FeatureProperty or example.NamespaceProperty.FeatureProperty return example => Expression.Property(baseExpression(example), featurePropertyInfo); // validate var converterCtor = attr.ConverterParameters == null ? attr.ConverterType.GetConstructor(Type.EmptyTypes) : attr.ConverterType.GetConstructor(attr.ConverterParameters.Select(o => o.GetType()).ToArray()); if (converterCtor == null) throw new ArgumentException($"Unable to find constructor for converter '{attr.ConverterType}' for '{featurePropertyInfo.Name}'"); var jsonConverter = converterCtor.Invoke(attr.ConverterParameters) as JsonConverter; if (jsonConverter == null) throw new ArgumentException($"JsonConverter '{attr.ConverterType}' for '{featurePropertyInfo.Name}' is not of type JsonConverter"); if (!jsonConverter.CanConvert(featurePropertyInfo.PropertyType)) throw new ArgumentException($"JsonConverter '{attr.ConverterType}' for '{featurePropertyInfo.Name}' does not support property type '{featurePropertyInfo.PropertyType}'"); // CODE: new JsonConverter*(arg1, arg2,...) var converterExpression = attr.ConverterParameters == null ? Expression.New(converterCtor) : Expression.New(converterCtor, attr.ConverterParameters.Select(o => Expression.Constant(o))); // leverage optimized path var serializableCtor = jsonConverter is IVowpalWabbitJsonConverter ? typeof(VowpalWabbitJsonOptimizedSerializable).GetConstructor(new[] { typeof(object), typeof(IVowpalWabbitJsonConverter) }) : typeof(VowpalWabbitJsonSerializable).GetConstructor(new[] { typeof(object), typeof(JsonConverter) }); // CODE new VowpalWabbitJsonConverter(object, new JsonConverter(...)) return example => Expression.New( serializableCtor, Expression.Property(baseExpression(example), featurePropertyInfo), converterExpression); } /// /// Extracts VW features from given type based on JSON.NET annotation. Basic structure: /// /// { /// _label: { ... }, // SimpleLabel or ContextualBanditLabel /// ns1: { // Complex types denote namespaces. Property name becomes namespace name. /// feature1: 5, // Primitive types denote features /// ... /// }, /// ns2 : { ... }, // another namespace /// feature2: true // Top-level primitive property becomes feature in default namespace. /// } /// internal static Schema CreateSchema(Type type, PropertyConfiguration propertyConfiguration) { var exampleMemberSerialization = GetMemberSerialiation(type); // find all feature properties under namespace properties var namespaceFeatures = from ns in type.GetProperties() // removing any JsonIgnore properties where !ns.GetCustomAttributes(typeof(JsonIgnoreAttribute), true).Any() let nsAttr = (JsonPropertyAttribute)ns.GetCustomAttributes(typeof(JsonPropertyAttribute), true).FirstOrDefault() let nsIsMarkedWithJsonConverter = ns.GetCustomAttribute(typeof(JsonConverterAttribute), true) is JsonConverterAttribute where !IsDictType(ns.PropertyType) && !IsTypeSupported(ns.PropertyType) && !nsIsMarkedWithJsonConverter && // model OptIn/OptOut (exampleMemberSerialization == MemberSerialization.OptOut || (exampleMemberSerialization == MemberSerialization.OptIn && nsAttr != null)) let namespaceRawValue = nsAttr != null && nsAttr.PropertyName != null ? nsAttr.PropertyName : ns.Name // filter all aux properties where !namespaceRawValue.StartsWith(propertyConfiguration.FeatureIgnorePrefix, StringComparison.Ordinal) let featureGroup = namespaceRawValue[0] let namespaceValue = namespaceRawValue.Length > 1 ? namespaceRawValue.Substring(1) : null let namespaceMemberSerialization = GetMemberSerialiation(ns.PropertyType) from p in ns.PropertyType.GetProperties() // removing any JsonIgnore properties where !p.GetCustomAttributes(typeof(JsonIgnoreAttribute), true).Any() let attr = (JsonPropertyAttribute)p.GetCustomAttributes(typeof(JsonPropertyAttribute), true).FirstOrDefault() let isMarkedWithJsonConverter = p.GetCustomAttribute(typeof(JsonConverterAttribute), true) is JsonConverterAttribute where (IsTypeSupported(p.PropertyType) || isMarkedWithJsonConverter) && // model OptIn/OptOut (exampleMemberSerialization == MemberSerialization.OptOut || (exampleMemberSerialization == MemberSerialization.OptIn && attr != null)) let name = attr != null && attr.PropertyName != null ? attr.PropertyName : p.Name let isTextProperty = name == propertyConfiguration.TextProperty // filter all aux properties where isTextProperty || !name.StartsWith(propertyConfiguration.FeatureIgnorePrefix, StringComparison.Ordinal) select new FeatureExpression( featureType: isMarkedWithJsonConverter ? typeof(VowpalWabbitJsonSerializable) : p.PropertyType, name: name, // CODE example.NamespaceProperty.FeatureProperty valueExpressionFactory: CreateValueExpressionFactory(ns, p), // Note: default to string escaping stringProcessing: isTextProperty ? StringProcessing.Split : StringProcessing.EscapeAndIncludeName, // CODE example != null // CODE example.NamespaceProperty != null valueValidExpressionFactories: new List>{ valueExpression => Expression.NotEqual(valueExpression, Expression.Constant(null)), valueExpression => Expression.NotEqual(Expression.Property(valueExpression, ns), Expression.Constant(null)) }, @namespace: namespaceValue, featureGroup: featureGroup); // find all top-level feature properties for the default namespace var defaultNamespaceFeatures = from p in type.GetProperties() // removing any JsonIgnore properties where !p.GetCustomAttributes(typeof(JsonIgnoreAttribute), true).Any() let attr = (JsonPropertyAttribute)p.GetCustomAttributes(typeof(JsonPropertyAttribute), true).FirstOrDefault() where // model OptIn/OptOut (exampleMemberSerialization == MemberSerialization.OptOut || (exampleMemberSerialization == MemberSerialization.OptIn && attr != null)) let name = attr != null && attr.PropertyName != null ? attr.PropertyName : p.Name // filter all aux properties, except for special props where propertyConfiguration.IsSpecialProperty(name) || !name.StartsWith(propertyConfiguration.FeatureIgnorePrefix, StringComparison.Ordinal) // filtering labels for now where name != propertyConfiguration.LabelProperty let isMarkedWithJsonConverter = p.GetCustomAttribute(typeof(JsonConverterAttribute), true) is JsonConverterAttribute where IsTypeSupported(p.PropertyType) || // _multi can be any list type that JSON.NET supports name == propertyConfiguration.MultiProperty || isMarkedWithJsonConverter || // labels must be ILabel or string // Note: from the JSON side they actually can be anything that serializes to the same properties as ILabel implementors (name == propertyConfiguration.LabelProperty && (typeof(ILabel).IsAssignableFrom(p.PropertyType) || p.PropertyType == typeof(string))) select new FeatureExpression( featureType: isMarkedWithJsonConverter ? typeof(VowpalWabbitJsonSerializable) : p.PropertyType, name: name, // CODE example.FeatureProperty valueExpressionFactory: CreateValueExpressionFactory(null, p), // Note: default to string escaping stringProcessing: name == propertyConfiguration.TextProperty ? StringProcessing.Split : StringProcessing.EscapeAndIncludeName, // CODE example != null valueValidExpressionFactories: new List>{ valueExpression => Expression.NotEqual(valueExpression, Expression.Constant(null)) }, @namespace: p.PropertyType.IsArray && name.Length > 1 ? name.Substring(1) : null, featureGroup: p.PropertyType.IsArray && name.Length > 0 ? name[0] : VowpalWabbitConstants.DefaultNamespace); // find all top-level dictionaries var topLevelDictionaries = from p in type.GetProperties() // removing any JsonIgnore properties where !p.GetCustomAttributes(typeof(JsonIgnoreAttribute), true).Any() let attr = (JsonPropertyAttribute)p.GetCustomAttributes(typeof(JsonPropertyAttribute), true).FirstOrDefault() where // model OptIn/OptOut (exampleMemberSerialization == MemberSerialization.OptOut || (exampleMemberSerialization == MemberSerialization.OptIn && attr != null)) where IsDictType(p.PropertyType) let name = attr != null && attr.PropertyName != null ? attr.PropertyName : p.Name let namespaceRawValue = attr != null && attr.PropertyName != null ? attr.PropertyName : p.Name // filter all aux properties where !namespaceRawValue.StartsWith(propertyConfiguration.FeatureIgnorePrefix, StringComparison.Ordinal) let featureGroup = namespaceRawValue[0] let namespaceValue = namespaceRawValue.Length > 1 ? namespaceRawValue.Substring(1) : null select new FeatureExpression( featureType: p.PropertyType, name: name, // CODE example.FeatureProperty valueExpressionFactory: CreateValueExpressionFactory(null, p), // CODE example != null valueValidExpressionFactories: new List> { valueExpression => Expression.NotEqual(valueExpression, Expression.Constant(null)) }, @namespace: namespaceValue, featureGroup: featureGroup); // find label var labelProperties = from p in type.GetProperties() // removing any JsonIgnore properties where !p.GetCustomAttributes(typeof(JsonIgnoreAttribute), true).Any() let attr = (JsonPropertyAttribute)p.GetCustomAttributes(typeof(JsonPropertyAttribute), true).FirstOrDefault() where // model OptIn/OptOut (exampleMemberSerialization == MemberSerialization.OptOut || (exampleMemberSerialization == MemberSerialization.OptIn && attr != null)) let name = attr != null && attr.PropertyName != null ? attr.PropertyName : p.Name // filtering labels for now where name == propertyConfiguration.LabelProperty where // labels must be ILabel or string // Note: from the JSON side they actually can be anything that serializes to the same properties as ILabel implementors (name == propertyConfiguration.LabelProperty && (typeof(ILabel).IsAssignableFrom(p.PropertyType) || p.PropertyType == typeof(string))) select new LabelExpression { LabelType = p.PropertyType, Name = name, // CODE example.Label ValueExpressionFactory = valueExpression => Expression.Property(valueExpression, p), // CODE example != null ValueValidExpressionFactories = new List>{ valueExpression => Expression.NotEqual(valueExpression, Expression.Constant(null)) } }; // TODO: _label_ and _labelIndex is not supported return new Schema { Label = labelProperties.FirstOrDefault(), Features = namespaceFeatures .Union(defaultNamespaceFeatures) .Union(topLevelDictionaries).ToList() }; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/Serializer/PrefixedJsonReader.cs000066400000000000000000000037041332666127000255470ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Linq; namespace VW.Serializer { /// /// A Json Reader allowing to prefix data from a wrapped JsonReader. /// internal class PrefixedJsonReader : JsonReader { private JsonReader reader; private Queue> prefix; /// /// Initializes a new instance of . /// /// The reader to be wrapped. /// The JsonTokens to be injected at the beginning of the stream. internal PrefixedJsonReader(JsonReader reader, params Tuple[] prefix) { this.reader = reader; this.prefix = new Queue>(prefix); } /// /// Injects the supplied prefix into the stream. /// /// True if another token is available, false otherwise. public override bool Read() { if (this.prefix.Count > 0) { var t = prefix.Dequeue(); this.SetToken(t.Item1, t.Item2); return true; } if (!this.reader.Read()) return false; this.SetToken(this.reader.TokenType, this.reader.Value); return true; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/Serializer/VowpalWabbitJsonBuilder.cs000066400000000000000000000766771332666127000266110ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Newtonsoft.Json; using Newtonsoft.Json.Linq; using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; using VW.Labels; using VW.Serializer.Intermediate; namespace VW.Serializer { /// /// Build from JSON following https://github.com/JohnLangford/vowpal_wabbit/wiki/JSON /// public sealed class VowpalWabbitJsonBuilder : IDisposable { /// /// Mapping from properties to types for labels. /// private static readonly Dictionary labelPropertyMapping; private readonly VowpalWabbit vw; private readonly VowpalWabbitDefaultMarshaller defaultMarshaller; private readonly JsonSerializer jsonSerializer; // required for reference resolution private readonly VowpalWabbitJsonSerializer serializer; private readonly VowpalWabbitJsonReferenceResolver referenceResolver; private readonly List namespaceStrings; private JsonReader reader; private bool foundMulti; private JObject labelObject; private ILabel label; private int featureCount; private VowpalWabbitJsonParseState extensionState; private List extensions; static VowpalWabbitJsonBuilder() { // find mapping from property names to types var q = from t in new[] { typeof(SimpleLabel), typeof(ContextualBanditLabel) } from p in t.GetProperties() let jsonProperty = (JsonPropertyAttribute)p.GetCustomAttributes(typeof(JsonPropertyAttribute), true).FirstOrDefault() where jsonProperty != null select new { Type = t, JsonProperty = jsonProperty, Property = p }; labelPropertyMapping = q.ToDictionary( e => (e.JsonProperty.PropertyName ?? e.Property.Name).ToLowerInvariant(), e => e.Type); } /// /// Initializes a new instance of . /// public VowpalWabbitJsonBuilder(IVowpalWabbitExamplePool vwPool, VowpalWabbitDefaultMarshaller defaultMarshaller, JsonSerializer jsonSerializer, int multiIndex = -1) : this(null, vwPool, defaultMarshaller, jsonSerializer, multiIndex) { } /// /// Initializes a new instance of . /// public VowpalWabbitJsonBuilder(VowpalWabbitJsonSerializer serializer, IVowpalWabbitExamplePool vwPool, VowpalWabbitDefaultMarshaller defaultMarshaller, JsonSerializer jsonSerializer, int multiIndex = -1) { Contract.Requires(serializer != null); Contract.Requires(vw != null); Contract.Requires(defaultMarshaller != null); Contract.Requires(jsonSerializer != null); this.extensionState = new VowpalWabbitJsonParseState { JsonBuilder = this, VW = vwPool.Native, MultiIndex = multiIndex }; this.namespaceStrings = new List(); this.foundMulti = false; if (serializer != null) this.referenceResolver = serializer.ReferenceResolver; this.serializer = serializer; this.vw = vwPool.Native; this.defaultMarshaller = defaultMarshaller; this.jsonSerializer = jsonSerializer; this.DefaultNamespaceContext = new VowpalWabbitMarshalContext(this.vw); } // useful for tracking down bugs // private string DefaultNamespaceContextStackTrace; /// /// The marshalling context for the default namespace. Can be modified until . /// public VowpalWabbitMarshalContext DefaultNamespaceContext { get; private set; } /// /// The index the label was assigned to for multi line examples. /// public int LabelIndex { get; private set; } /// /// The label that was deserialized. /// public ILabel Label { get; private set; } /// /// Creates the managed example representation. /// /// Returns the managed example. public VowpalWabbitExample CreateExample() { try { if (this.featureCount == 0) return null; var vwExample = this.DefaultNamespaceContext.ExampleBuilder.CreateExample(); if (this.vw.Settings.EnableStringExampleGeneration) { var str = this.DefaultNamespaceContext.ToString(); if (str.Length > 0) this.namespaceStrings.Insert(0, str); vwExample.VowpalWabbitString = string.Join(" ", this.namespaceStrings); } return vwExample; } finally { // useful for tracking down bugs // this.DefaultNamespaceContextStackTrace = "Create Example" + Environment.StackTrace; this.DefaultNamespaceContext.Dispose(); this.DefaultNamespaceContext = null; } } // re-entering from extension internal void Parse(List path, VowpalWabbitMarshalContext namespaceContext, Namespace ns) { this.featureCount = this.defaultMarshaller.MarshalNamespace(namespaceContext, ns, () => this.ParseProperties(path)) + this.featureCount; } /// /// Parse VW JSON /// public void Parse(JsonReader reader, VowpalWabbitMarshalContext context, Namespace ns, List extensions = null) { this.namespaceStrings.Clear(); this.reader = reader; this.extensions = extensions; // handle the case when the reader is already positioned at JsonToken.StartObject if (reader.TokenType == JsonToken.None && !reader.Read()) return; // don't barf on null values. if (reader.TokenType == JsonToken.Null) return; if (reader.TokenType != JsonToken.StartObject) throw new VowpalWabbitJsonException(this.reader, $"Expected start object. Found '{reader.TokenType}' and value '{reader.Value}' for namespace {ns.Name}"); // re-direct default namespace to the one passed var saveDefaultNamespaceContext = this.DefaultNamespaceContext; try { using (this.DefaultNamespaceContext = new VowpalWabbitMarshalContext(this.vw, context.ExampleBuilder)) { VowpalWabbitJsonParseContext localContext = null; try { // setup current namespace localContext = new VowpalWabbitJsonParseContext { Namespace = ns, Context = new VowpalWabbitMarshalContext(this.vw, context.ExampleBuilder), JsonProperty = ns.Name }; { this.defaultMarshaller.MarshalNamespace( localContext.Context, ns, () => this.ParseProperties(new List { localContext })); // append string features if we found some if (this.vw.Settings.EnableStringExampleGeneration) { context.StringExample .Append(localContext.Context.StringExample) .Append(string.Join(" ", this.namespaceStrings)); } } } finally { if (localContext != null && localContext.Context != null) { localContext.Context.Dispose(); localContext.Context = null; } } } } finally { this.DefaultNamespaceContext = saveDefaultNamespaceContext; } } /// /// Parses the example. /// /// The example to parse. /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Action to be executed when special properties are discovered. /// The VowpalWabbit native example. public void Parse(JsonReader reader, ILabel label = null, List extensions = null) { this.featureCount = 0; this.labelObject = null; this.foundMulti = false; // avoid parameter passing for the sake of non-reentrantness this.reader = reader; this.label = label; this.extensions = extensions; if (label != null) this.defaultMarshaller.MarshalLabel(this.DefaultNamespaceContext, label); // handle the case when the reader is already positioned at JsonToken.StartObject if (reader.TokenType == JsonToken.None && !reader.Read()) return; if (reader.TokenType != JsonToken.StartObject) throw new VowpalWabbitJsonException(this.reader, string.Format("Expected start object. Found '{0}' and value '{1}'", reader.TokenType, reader.Value)); var ns = new Namespace(this.vw); var path = new List { new VowpalWabbitJsonParseContext { Namespace = ns, Context = this.DefaultNamespaceContext, JsonProperty = string.Empty } }; this.extensionState.Reader = reader; this.extensionState.Path = path; // TODO: duplicate namespace recursion to enable async // featureCount might be modified inside ParseProperties... this.featureCount = this.defaultMarshaller.MarshalNamespace(this.DefaultNamespaceContext, ns, () => this.ParseProperties(path)) + this.featureCount; if (this.labelObject != null) { var propertyName = ((JProperty)this.labelObject.First).Name; Type labelType; if (!labelPropertyMapping.TryGetValue(propertyName.ToLowerInvariant(), out labelType)) throw new VowpalWabbitJsonException(this.reader, "The first property ('" + propertyName + "') must match to a property of a VowpalWabbit label type."); var labelObj = (ILabel)this.labelObject.ToObject(labelType); if (this.foundMulti) this.Label = labelObj; else this.defaultMarshaller.MarshalLabel(this.DefaultNamespaceContext, labelObj); } } private void ParseSpecialProperty(VowpalWabbitJsonParseContext context, string propertyName) { var propertyConfiguration = this.vw.Settings.PropertyConfiguration; // special fields if (propertyName.Equals(propertyConfiguration.LabelProperty, StringComparison.OrdinalIgnoreCase)) { // passed in label has precedence if (label == null) this.ParseLabel(); else reader.Skip(); } else if (propertyName.Equals(propertyConfiguration.TextProperty, StringComparison.OrdinalIgnoreCase)) { // parse text segment feature this.defaultMarshaller.MarshalFeatureStringSplit( context.Context, context.Namespace, new Feature(propertyName), reader.ReadAsString()); } else if (propertyName.Equals(propertyConfiguration.LabelIndexProperty, StringComparison.OrdinalIgnoreCase)) { if (!this.reader.Read()) throw new VowpalWabbitJsonException(this.reader, "Unexpected end"); // skip if (this.reader.TokenType == JsonToken.Null) return; this.LabelIndex = (int)(long)this.reader.Value; } else if (propertyName.StartsWith(propertyConfiguration.LabelPropertyPrefix, StringComparison.OrdinalIgnoreCase)) { if (!this.reader.Read()) throw new VowpalWabbitJsonException(this.reader, "Unexpected end"); // skip if (this.reader.TokenType == JsonToken.Null) return; if (this.labelObject == null) this.labelObject = new JObject(); var targetPropertyName = propertyName.Substring(propertyConfiguration.LabelPropertyPrefix.Length); this.labelObject.Add(targetPropertyName, new JValue(this.reader.Value)); } else { if (propertyName.Equals(propertyConfiguration.MultiProperty, StringComparison.Ordinal)) this.foundMulti = true; // forward to handler if (this.extensions != null) foreach (var extension in this.extensions) if (extension(this.extensionState, propertyName)) return; // if not handled, skip it reader.Skip(); } } private void ParseLabel() { // peak the first property name if (!this.reader.Read()) throw new VowpalWabbitJsonException(this.reader, "Unexpected end"); switch (reader.TokenType) { case JsonToken.StartObject: { // parse complex object if (!reader.Read() || reader.TokenType != JsonToken.PropertyName) throw new VowpalWabbitJsonException(this.reader, "Expected at least a single property to determine the label object"); var propertyName = (string)reader.Value; var prefixReader = new PrefixedJsonReader(this.reader, Tuple.Create(JsonToken.StartObject, (object)null), Tuple.Create(JsonToken.PropertyName, (object)propertyName)); Type labelType; if (!labelPropertyMapping.TryGetValue(propertyName.ToLowerInvariant(), out labelType)) throw new VowpalWabbitJsonException(this.reader, "The first property ('" + propertyName + "') must match to a property of a VowpalWabbit label type."); var label = (ILabel)jsonSerializer.Deserialize(prefixReader, labelType); this.defaultMarshaller.MarshalLabel(this.DefaultNamespaceContext, label); } break; case JsonToken.Integer: case JsonToken.Float: case JsonToken.String: { // pass label directly to VW var labelString = reader.Value.ToString(); this.defaultMarshaller.MarshalLabel(this.DefaultNamespaceContext, new StringLabel(labelString)); } break; case JsonToken.Null: // ignore break; default: throw new VowpalWabbitJsonException(this.reader, "Expected label object"); } } /// /// Expects that actual feature value. /// private void ParseFeature(List path, string featureName) { switch (featureName) { case "$id": { if (this.referenceResolver == null) return; var id = (string)reader.Value; if (!reader.Read() || reader.TokenType != JsonToken.PropertyName || (string)reader.Value != "$values") throw new VowpalWabbitJsonException(this.reader, "Expecting '$values' property"); // read $values if (!reader.Read()) throw new VowpalWabbitJsonException(this.reader, "Unexpected end"); // create re-useable marshalling call var marshalAction = this.ParseFeatureReUsable(); // keep action for re-use this.referenceResolver.AddReference(id, marshalAction); // go up 2 levels to find actual namespace, the last one is actually the property we want to serialize featureName = path.Last().JsonProperty; var context = path[path.Count - 2]; marshalAction.Marshal(this.defaultMarshaller, context.Context, context.Namespace, featureName); } return; case "$ref": { if (this.referenceResolver == null || this.serializer == null) return; var id = (string)reader.Value; // go up 2 levels to find actual namespace, the last one is actually the property we want to serialize featureName = path.Last().JsonProperty; var ns = path[path.Count - 2].Namespace; this.referenceResolver.Resolve( this.serializer, id, marshalAction => { // setup fresh context using (var context = new VowpalWabbitMarshalContext(this.vw, this.DefaultNamespaceContext.ExampleBuilder)) { this.featureCount += this.defaultMarshaller.MarshalNamespace( context, ns, () => marshalAction.Marshal(this.defaultMarshaller, context, ns, featureName)); // append default namespaces features if we found some if (this.vw.Settings.EnableStringExampleGeneration) { var str = context.ToString(); if (str.Length > 0) this.namespaceStrings.Add(str); } } }); } return; } var localContext = path.Last(); this.ParseFeature(path, localContext.Context, localContext.Namespace, featureName); } private IVowpalWabbitMarshalAction ParseFeatureReUsable() { // make sure the returned action is independent of the current parsing context, so we can ship it switch (reader.TokenType) { case JsonToken.Float: return VowpalWabbitMarshalActions.Create((double)reader.Value); case JsonToken.Integer: return VowpalWabbitMarshalActions.Create((long)reader.Value); case JsonToken.String: return VowpalWabbitMarshalActions.Create((string)reader.Value); case JsonToken.Boolean: return VowpalWabbitMarshalActions.Create((bool)reader.Value); case JsonToken.Comment: case JsonToken.Null: // probably best to ignore? break; case JsonToken.StartArray: return this.ParseFeatureArrayReUsable(); } return null; } /// /// Expects: "1,2.2,3]" (excluding the leading [) /// private IVowpalWabbitMarshalAction ParseFeatureArrayReUsable() { var values = new float[16]; var index = 0; while (reader.Read()) { float val; switch (reader.TokenType) { case JsonToken.Integer: val = (float)(long)reader.Value; break; case JsonToken.Float: val = (float)(double)reader.Value; break; case JsonToken.EndArray: goto done; default: throw new VowpalWabbitJsonException(this.reader, "Unxpected token " + reader.TokenType + " while deserializing dense feature array"); } if (index == values.Length) { var newValues = new float[values.Length * 2]; Array.Copy(values, newValues, values.Length); values = newValues; } values[index++] = val; } done: return VowpalWabbitMarshalActions.Create(values, index); } /// /// Expects that actual feature value. /// private void ParseFeature(List path, VowpalWabbitMarshalContext context, Namespace ns, string featureName) { switch (reader.TokenType) { case JsonToken.Float: VowpalWabbitMarshalActions.Marshal(this.defaultMarshaller, context, ns, featureName, (double)reader.Value); break; case JsonToken.Integer: VowpalWabbitMarshalActions.Marshal(this.defaultMarshaller, context, ns, featureName, (long)reader.Value); break; case JsonToken.String: VowpalWabbitMarshalActions.Marshal(this.defaultMarshaller, context, ns, featureName, (string)reader.Value); break; case JsonToken.Boolean: VowpalWabbitMarshalActions.Marshal(this.defaultMarshaller, context, ns, featureName, (bool)reader.Value); break; case JsonToken.Comment: case JsonToken.Null: // probably best to ignore? break; case JsonToken.StartArray: this.WrapInNamespace(path, featureName, lastContext => this.ParseFeatureArray(path)); break; default: throw new VowpalWabbitJsonException(this.reader, "Unexpected token " + reader.TokenType + " while deserializing primitive feature"); } } /// /// Expects: "1,2.2,3]" (excluding the leading [) /// private void ParseFeatureArray(List path) { var context = path.Last().Context; var ns = path.Last().Namespace; ulong index = 0; while (reader.Read()) { switch (reader.TokenType) { case JsonToken.Integer: MarshalFloatFeature(context, ns, index, (float)(long)reader.Value); break; case JsonToken.Float: MarshalFloatFeature(context, ns, index, (float)(double)reader.Value); break; case JsonToken.StartObject: ParseProperties(path); break; case JsonToken.EndArray: return; case JsonToken.Null: // just ignore nulls break; default: throw new VowpalWabbitJsonException(this.reader, "Unxpected token " + reader.TokenType + " while deserializing dense feature array"); } index++; } } private static void MarshalFloatFeature(VowpalWabbitMarshalContext context, Namespace ns, ulong index, float value) { context.NamespaceBuilder.AddFeature(ns.NamespaceHash + index, value); if (context.StringExample != null) { context.AppendStringExample( false, " {0}:" + (context.VW.Settings.EnableStringFloatCompact ? "{1}" : "{1:E20}"), index, value); } } private void WrapInNamespace(List path, string namespaceValue, Action action) { VowpalWabbitJsonParseContext parseContext = null; VowpalWabbitMarshalContext marshalContext = null; try { var ns = new Namespace(this.vw, namespaceValue); marshalContext = new VowpalWabbitMarshalContext(this.vw, this.DefaultNamespaceContext.ExampleBuilder); parseContext = new VowpalWabbitJsonParseContext { Namespace = ns, Context = marshalContext, JsonProperty = namespaceValue }; // the namespace is only added on dispose, to be able to check if at least a single feature has been added marshalContext.NamespaceBuilder = marshalContext.ExampleBuilder.AddNamespace(ns.FeatureGroup); var position = 0; var stringExample = marshalContext.StringExample; if (marshalContext.StringExample != null) position = stringExample.Append(ns.NamespaceString).Length; path.Add(parseContext); action(parseContext); // append default namespaces features if we found some if (this.vw.Settings.EnableStringExampleGeneration) { var str = marshalContext.ToString(); if (str.Length > 0) this.namespaceStrings.Add(str); } this.featureCount += (int)marshalContext.NamespaceBuilder.FeatureCount; } finally { path.RemoveAt(path.Count - 1); if (marshalContext.NamespaceBuilder != null) { marshalContext.NamespaceBuilder.Dispose(); marshalContext.NamespaceBuilder = null; } if (parseContext != null && parseContext.Context != null) { parseContext.Context.Dispose(); parseContext.Context = null; } } } /// /// Parses { "feature1":1, "feature2":true, .... } /// private void ParseNamespaceAndFeatures(List path, string namespaceValue) { this.WrapInNamespace(path, namespaceValue, context => this.ParseProperties(path)); } private void ParseProperties(List path) { var propertyConfiguration = this.vw.Settings.PropertyConfiguration; while (reader.Read()) { switch (reader.TokenType) { case JsonToken.PropertyName: var propertyName = (string)reader.Value; if (propertyName.StartsWith(propertyConfiguration.FeatureIgnorePrefix, StringComparison.Ordinal) || propertyConfiguration.IsSpecialProperty(propertyName)) { this.ParseSpecialProperty(path.Last(), propertyName); continue; } if (!reader.Read()) throw new VowpalWabbitJsonException(this.reader, "Unexpected end while parsing namespace"); // TODO: this.Context might have to be a stack... if (reader.TokenType == JsonToken.StartObject) this.ParseNamespaceAndFeatures(path, propertyName); else this.ParseFeature(path, propertyName); break; case JsonToken.EndObject: return; } } } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.DefaultNamespaceContext != null) { // useful for tracking down bugs // this.DefaultNamespaceContextStackTrace = "Dispose" + Environment.StackTrace; this.DefaultNamespaceContext.Dispose(); this.DefaultNamespaceContext = null; } } } } /// /// A parsing context holding the current state during JSON parsing. /// public sealed class VowpalWabbitJsonParseContext { /// /// The current marshalling context. /// public VowpalWabbitMarshalContext Context { get; set; } /// /// The current namespace. /// public Namespace Namespace { get; set; } /// /// The current JSON property being processed. /// public string JsonProperty { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/Serializer/VowpalWabbitJsonException.cs000066400000000000000000000037061332666127000271400ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Newtonsoft.Json; using System; namespace VW.Serializer { /// /// Exception thrown if fails to deserialize the JSON. /// [Serializable] public sealed class VowpalWabbitJsonException : Exception { /// /// Initializes a new instance of the class. /// /// The reader used at deserialization time. /// The message that describes the error. public VowpalWabbitJsonException(JsonReader reader, string message) : base(message) { this.Path = reader.Path; var lineInfo = reader as IJsonLineInfo; if (lineInfo != null) { this.LineNumber = lineInfo.LineNumber; this.LinePosition = lineInfo.LinePosition; } } /// /// The line number at which this error happened. /// public int LineNumber { get; private set; } /// /// The character position at which this error happened. /// public int LinePosition { get; private set; } /// /// The path as returned by . /// public string Path { get; private set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/Serializer/VowpalWabbitJsonOptimizedSerializable.cs000066400000000000000000000047451332666127000315010ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Newtonsoft.Json; using System; using System.IO; using VW.Serializer.Intermediate; namespace VW.Serializer { /// /// Uses the supplied to get the JSON fragments for a given object. /// public class VowpalWabbitJsonOptimizedSerializable : IVowpalWabbitSerializable { private readonly object value; private readonly IVowpalWabbitJsonConverter jsonConverter; /// /// Initializes a new instance. /// public VowpalWabbitJsonOptimizedSerializable(object value, IVowpalWabbitJsonConverter jsonConverter) { this.value = value; this.jsonConverter = jsonConverter; } /// /// Marshals JSON string into VW example. /// public void Marshal(VowpalWabbitMarshalContext ctx, Namespace ns, Feature feature) { if (this.value == null) return; try { var jsonSerializer = new JsonSerializer(); using (var jsonBuilder = new VowpalWabbitJsonBuilder(ctx.VW, VowpalWabbitDefaultMarshaller.Instance, jsonSerializer)) { // marshal from JSON to VW foreach (var json in jsonConverter.JsonFragments(this.value)) { if (json == null) continue; using (var reader = new JsonTextReader(new StringReader(json))) { jsonBuilder.Parse(reader, ctx, new Namespace(ctx.VW, feature.Name)); } } } } catch (Exception e) { throw new VowpalWabbitSerializationException("Optimized marshalling failed", e, ns, feature); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/Serializer/VowpalWabbitJsonReferenceResolver.cs000066400000000000000000000206341332666127000306210ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Linq; using System.Runtime.Caching; namespace VW.Serializer { /// /// Reference resolver for JSON.NET $id, $ref elements. /// public sealed class VowpalWabbitJsonReferenceResolver : IDisposable { /// /// Monitoring statistics. /// public sealed class Stats { /// /// The number of items currently cached. /// public long ItemCount { get; internal set; } /// /// The number of outstanding requests to resolve a referencce. /// public long NumberOfOpenRequests { get; internal set; } } private readonly Action exampleComplete; private readonly object lockObject; private MemoryCache cache; private MemoryCache cacheRequests; private readonly Func cacheItemPolicyFactory; private readonly Func cacheRequestItemPolicyFactory; private int numberOfOpenRequests; /// /// Initializes a new instance. /// /// A callback triggered when all outstanding references for a given example are resolved. /// Optional name. /// Optional cache policy for cached items. Defaults to 1 hour sliding expiration. /// Optional cache policy for resolution requets. Defaults to 1 hour sliding expiration. public VowpalWabbitJsonReferenceResolver( Action exampleComplete, string cacheName = null, Func cacheItemPolicyFactory = null, Func cacheRequestItemPolicyFactory = null) { this.lockObject = new object(); this.exampleComplete = exampleComplete; if (cacheName == null) cacheName = "VowpalWabbitJsonExampleCache"; this.cacheItemPolicyFactory = cacheItemPolicyFactory == null ? _ => new CacheItemPolicy { SlidingExpiration = TimeSpan.FromHours(1) } : cacheItemPolicyFactory; this.cacheRequestItemPolicyFactory = cacheRequestItemPolicyFactory == null ? _ => new CacheItemPolicy { SlidingExpiration = TimeSpan.FromHours(1) } : cacheRequestItemPolicyFactory; this.cache = new MemoryCache(cacheName); this.cacheRequests = new MemoryCache(cacheName + "Requests"); } /// /// Monitoring statistics. /// public Stats Statistics { get { lock (this.lockObject) { return new Stats { ItemCount = this.cache.GetCount(), NumberOfOpenRequests = this.numberOfOpenRequests }; } } } internal void AddReference(string id, IVowpalWabbitMarshalAction marshalAction) { List requests = null; lock (this.lockObject) { // ignore duplicate keys - still update the sliding timer if (this.cache.Contains(id)) return; this.cache.Add( new CacheItem(id, marshalAction), this.cacheItemPolicyFactory(id)); requests = (List)this.cacheRequests.Get(id); if (requests != null) { foreach (var req in requests) req.DontDispose = true; this.cacheRequests.Remove(id); this.numberOfOpenRequests -= requests.Count; } } // since this can be called from another thread we need to dispatch to the serializer and let it decide // when to resolve the marshalling request if (requests != null) { foreach (var req in requests) if (req.Serializer.Resolve(() => req.Marshal(marshalAction))) this.exampleComplete(req.Serializer); } } internal void Resolve(VowpalWabbitJsonSerializer serializer, string id, Action resolveAction) { IVowpalWabbitMarshalAction marshal; lock (this.lockObject) { marshal = (IVowpalWabbitMarshalAction)this.cache.Get(id); if (marshal == null) { // not found, register for delayed completion var requests = (List)this.cacheRequests.Get(id); if (requests == null) { var policy = this.cacheRequestItemPolicyFactory(id); // dispatch to original handler too var removeHandler = policy.RemovedCallback; if (removeHandler == null) policy.RemovedCallback = this.CacheEntryRemovedCallback; else policy.RemovedCallback = args => { removeHandler(args); this.CacheEntryRemovedCallback(args); }; requests = new List(); this.cacheRequests.Add( new CacheItem(id, requests), policy); } requests.Add( new IncompleteReferenceRequest { Serializer = serializer, Marshal = resolveAction }); this.numberOfOpenRequests++; serializer.IncreaseUnresolved(); return; } } // avoid extensive locking resolveAction(marshal); } private void CacheEntryRemovedCallback(CacheEntryRemovedArguments arguments) { lock (this.lockObject) { var requests = (List)arguments.CacheItem.Value; // dispose outstanding requests foreach (var request in requests) if (!request.DontDispose) request.Serializer.Dispose(); } } private sealed class IncompleteReferenceRequest { internal IncompleteReferenceRequest() { this.DontDispose = false; } internal VowpalWabbitJsonSerializer Serializer { get; set; } internal Action Marshal { get; set; } // if we return to the handler, the handler has to dispose internal bool DontDispose { get; set; } } /// /// Disposes hold resources. /// public void Dispose() { if (this.cacheRequests != null) { // trigger dispose foreach (var key in this.cacheRequests.Select(kv => kv.Key).ToList()) this.cacheRequests.Remove(key); this.cacheRequests.Dispose(); this.cacheRequests = null; } if (this.cache != null) { this.cache.Dispose(); this.cache = null; } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/Serializer/VowpalWabbitJsonSerializable.cs000066400000000000000000000045321332666127000276060ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Newtonsoft.Json; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Threading.Tasks; using VW.Serializer.Intermediate; namespace VW.Serializer { /// /// Uses supplied JsonConverter to custom serialize object into JSON and then marshal from there. /// public class VowpalWabbitJsonSerializable : IVowpalWabbitSerializable { private readonly object value; private readonly JsonConverter jsonConverter; /// /// Initializes a new instance. /// public VowpalWabbitJsonSerializable(object value, JsonConverter jsonConverter) { this.value = value; this.jsonConverter = jsonConverter; } /// /// Marshals JSON string into VW example. /// public void Marshal(VowpalWabbitMarshalContext ctx, Namespace ns, Feature feature) { if (this.value == null) return; var jsonSerializer = new JsonSerializer(); using (var jsonBuilder = new VowpalWabbitJsonBuilder(ctx.VW, VowpalWabbitDefaultMarshaller.Instance, jsonSerializer)) { // serialize from object to JSON var sb = new StringBuilder(); using (var writer = new JsonTextWriter(new StringWriter(sb))) { this.jsonConverter.WriteJson(writer, this.value, jsonSerializer); } // marshal from JSON to VW using (var reader = new JsonTextReader(new StringReader(sb.ToString()))) { jsonBuilder.Parse(reader, ctx, new Namespace(ctx.VW, feature.Name)); } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/Serializer/VowpalWabbitJsonSerializer.cs000066400000000000000000000470001332666127000273060ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Globalization; using System.IO; using System.Linq; using System.Threading.Tasks; using VW.Labels; using VW.Serializer.Intermediate; namespace VW.Serializer { /// /// The current JSON parse state. /// public sealed class VowpalWabbitJsonParseState { /// /// The native VW instance. /// public VowpalWabbit VW { get; set; } /// /// The JSON reader. /// public JsonReader Reader { get; set; } /// /// The VW example JSON builder. /// public VowpalWabbitJsonBuilder JsonBuilder { get; set; } /// /// The current property path within the JSON. /// public List Path { get; set; } /// /// The current _multi element index. /// public int MultiIndex { get; set; } /// /// Triggers parsing at the current state of the using the default namespace. /// public void Parse() { using (var context = new VowpalWabbitMarshalContext(this.VW, this.JsonBuilder.DefaultNamespaceContext.ExampleBuilder)) { var ns = new Namespace(this.VW); this.Parse(context, ns); } } /// /// Triggers parsing at the current state of the using the given . /// /// The namespace the JSON should be marshalled into. /// The namespace the JSON should be marshalled into. public void Parse(VowpalWabbitMarshalContext namespaceContext, Namespace ns) { this.JsonBuilder.Parse(this.Path, namespaceContext, ns); } } /// /// Delegate definition for JSON parsing extension. E.g. if one wants to extract "_timestamp" or a like. /// /// The current parsing state. /// The property encountered. /// True if the extension handled this property, false otherwise. /// Only fires for "ignore prefixed" properties. public delegate bool VowpalWabbitJsonExtension(VowpalWabbitJsonParseState state, string property); /// /// A deserializer from JSON to Vowpal Wabbit native examples. /// public sealed class VowpalWabbitJsonSerializer : IDisposable { private readonly IVowpalWabbitExamplePool vwPool; private readonly JsonSerializer jsonSerializer; private readonly VowpalWabbitJsonReferenceResolver referenceResolver; private int unresolved; private readonly object lockObject = new object(); private bool ready = false; private List marshalRequests; private List extensions; /// /// Initializes a new instance of the class. /// /// The VW native instance. /// An optional reference resolver. public VowpalWabbitJsonSerializer(IVowpalWabbitExamplePool vwPool, VowpalWabbitJsonReferenceResolver referenceResolver = null) { Contract.Requires(vwPool != null); this.extensions = new List { this.HandleMultiProperty }; this.jsonSerializer = new JsonSerializer(); this.vwPool = vwPool; this.referenceResolver = referenceResolver; this.ExampleBuilder = new VowpalWabbitJsonBuilder(this, this.vwPool, VowpalWabbitDefaultMarshaller.Instance, this.jsonSerializer); } /// /// Registers a parsing extension. /// /// The extension to be rgistered. public void RegisterExtension(VowpalWabbitJsonExtension extension) { this.extensions.Add(extension); } /// /// Userful if this deserializer is published through VowpalWabbitJsonReferenceResolver. /// public object UserContext { get; set; } /// /// Single line example or shared example. /// public VowpalWabbitJsonBuilder ExampleBuilder { get; private set; } /// /// Multi-line examples. /// public List ExampleBuilders { get; private set; } internal VowpalWabbitJsonReferenceResolver ReferenceResolver { get { return this.referenceResolver; } } internal void IncreaseUnresolved() { // only called during the initial parsing run this.unresolved++; } internal bool Resolve(Action marshal) { lock (this.lockObject) { // ready is false until the initial parsing run is complete if (this.ready) { // the object doesn't get anymore unresolved marshal requests if (this.marshalRequests != null) { foreach (var req in this.marshalRequests) req(); this.unresolved -= this.marshalRequests.Count; this.marshalRequests = null; } marshal(); this.unresolved--; if (this.unresolved < 0) throw new InvalidOperationException("Number of unresolved requested must not be negative"); return this.unresolved == 0; } else { // we need to track the requests and wait until the initial parsing is done if (this.marshalRequests == null) this.marshalRequests = new List(); this.marshalRequests.Add(marshal); return false; } } } /// /// Creates the VW example, be it single or multi-line. /// /// The marshalled VW example. public VowpalWabbitExampleCollection CreateExamples() { lock (this.lockObject) { if (this.unresolved == 0) return this.CreateExamplesInternal(); if (this.marshalRequests != null && this.unresolved == this.marshalRequests.Count) { return this.CreateExamplesInternal(); } // wait for delayed completion this.ready = true; return null; } } /// /// Creates the VW example, be it single or multi-line. /// /// The label to be applied. /// The index of the example in the multi-line example this label should be applied on. /// public VowpalWabbitExampleCollection CreateExamples(ILabel label, int index) { if (index >= this.ExampleBuilders.Count) throw new InvalidDataException($"Label index {index} is invalid. Only {this.ExampleBuilders.Count} examples available."); VowpalWabbitDefaultMarshaller.Instance.MarshalLabel( this.ExampleBuilders[index].DefaultNamespaceContext, label); return this.CreateExamples(); } /// /// Parses and creates the example. /// /// The example to parse. /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Optional index of example the given label should be applied for multi-line examples. /// The VowpalWabbit native example. public VowpalWabbitExampleCollection ParseAndCreate(string json, ILabel label = null, int? index = null) { this.Parse(json, label, index); return this.CreateExamples(); } /// /// Parses the example. /// /// The example to parse. /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Optional index of example the given label should be applied for multi-line examples. /// The VowpalWabbit native example. public VowpalWabbitExampleCollection ParseAndCreate(JsonReader reader, ILabel label = null, int? index = null) { this.Parse(reader, label, index); return this.CreateExamples(); } /// /// Parses the example. /// /// The example to parse. /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Optional index of example the given label should be applied for multi-line examples. public void Parse(string json, ILabel label = null, int? index = null) { using (var textReader = new JsonTextReader(new StringReader(json))) { this.Parse(textReader, label); } } /// /// Returns the number of action dependent examples found within . /// /// The JSON to be inspected. /// Returns the number of action dependent examples. public static int GetNumberOfActionDependentExamples(string json) { using (var textReader = new JsonTextReader(new StringReader(json))) { return GetNumberOfActionDependentExamples(textReader); } } /// /// Returns the number of action dependent examples found within . /// /// The JSON. /// The optional multi property name. /// Returns the number of action dependent examples. public static int GetNumberOfActionDependentExamples(JsonReader reader, string multiProperty = PropertyConfiguration.MultiPropertyDefault) { // handle the case when the reader is already positioned at JsonToken.StartObject if (reader.TokenType == JsonToken.None && !reader.Read()) throw new VowpalWabbitJsonException(reader, "Expected non-empty JSON"); if (reader.TokenType != JsonToken.StartObject) throw new VowpalWabbitJsonException(reader, "Expected start object"); while (reader.Read()) { if (!(reader.TokenType == JsonToken.PropertyName && (string)reader.Value == multiProperty)) { reader.Skip(); continue; } if (!reader.Read() || reader.TokenType != JsonToken.StartArray) throw new VowpalWabbitJsonException(reader, "Expected start arrray"); var exampleCount = 0; while (reader.Read() && reader.TokenType != JsonToken.EndArray) { exampleCount++; reader.Skip(); } return exampleCount; } return 0; } private bool HandleMultiProperty(VowpalWabbitJsonParseState state, string property) { var multiPropertyName = this.vwPool.Native.Settings.PropertyConfiguration.MultiProperty; if (!property.Equals(multiPropertyName, StringComparison.OrdinalIgnoreCase)) return false; var reader = state.Reader; if (!reader.Read() || reader.TokenType != JsonToken.StartArray) throw new VowpalWabbitJsonException(reader, "Expected start array for '" + multiPropertyName + "'"); if (this.ExampleBuilders == null) this.ExampleBuilders = new List(); state.MultiIndex = 0; while (reader.Read()) { switch (reader.TokenType) { case JsonToken.StartObject: VowpalWabbitJsonBuilder builder = null; try { builder = new VowpalWabbitJsonBuilder(this, this.vwPool, VowpalWabbitDefaultMarshaller.Instance, this.jsonSerializer, state.MultiIndex); this.ExampleBuilders.Add(builder); } catch (Exception) { builder.Dispose(); throw; } // pass the label to the selected example builder.Parse(reader, index != null && index == this.ExampleBuilders.Count - 1 ? label : null, this.extensions); state.MultiIndex++; break; case JsonToken.EndArray: return true; default: throw new VowpalWabbitJsonException(reader, "Unexpected token: " + reader.TokenType); } } throw new VowpalWabbitJsonException(reader, "Unexpected end"); } // TODO: keeping it local might be nicer... private int? index; private ILabel label; /// /// Parses the example. /// /// The example to parse. /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Optional index of example the given label should be applied for multi-line examples. public void Parse(JsonReader reader, ILabel label = null, int? index = null) { this.index = index; this.label = label; // only pass the label if it's not targeted at a particular index this.ExampleBuilder.Parse(reader, index == null ? label : null, this.extensions); // check if the outer example found a label if (this.ExampleBuilder.Label != null) { if (this.ExampleBuilder.LabelIndex >= this.ExampleBuilders.Count) throw new InvalidDataException($"Label index {this.ExampleBuilder.LabelIndex} is invalid. Only {this.ExampleBuilders.Count} examples available."); VowpalWabbitDefaultMarshaller.Instance.MarshalLabel( this.ExampleBuilders[this.ExampleBuilder.LabelIndex].DefaultNamespaceContext, this.ExampleBuilder.Label); } } /// /// Creates the examples ready for learning or prediction. /// public VowpalWabbitExampleCollection CreateExamplesInternal() { try { if (this.ExampleBuilders == null) { return new VowpalWabbitSingleLineExampleCollection(this.vwPool.Native, this.ExampleBuilder.CreateExample()); } else { // making sure we don't leak memory VowpalWabbitExample sharedExample = null; var examples = new VowpalWabbitExample[this.ExampleBuilders.Count]; try { // mark shared example as shared VowpalWabbitDefaultMarshaller.Instance.MarshalLabel(this.ExampleBuilder.DefaultNamespaceContext, SharedLabel.Instance); sharedExample = this.ExampleBuilder.CreateExample(); for (int i = 0; i < this.ExampleBuilders.Count; i++) examples[i] = this.ExampleBuilders[i].CreateExample(); return new VowpalWabbitMultiLineExampleCollection(this.vwPool.Native, sharedExample, examples); } catch (Exception) { if (sharedExample != null) sharedExample.Dispose(); foreach (var e in examples) if (e != null) e.Dispose(); throw; } } } finally { this.ExampleBuilder.Dispose(); this.ExampleBuilder = null; if (this.ExampleBuilders != null) { foreach (var eb in this.ExampleBuilders) eb.Dispose(); this.ExampleBuilders = null; } } } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { // Remark: might be called multiple times from VowpalWabbitJsonReferenceResolver if (disposing) { // cleanup in case CreateExample() wasn't called if (this.ExampleBuilder != null) { this.ExampleBuilder.Dispose(); this.ExampleBuilder = null; } if (this.ExampleBuilders != null) { foreach (var eb in this.ExampleBuilders) eb.Dispose(); this.ExampleBuilders = null; } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/VowpalWabbitJson.cs000066400000000000000000000261521332666127000231500ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Newtonsoft.Json; using System; using System.Diagnostics.Contracts; using VW.Labels; using VW.Serializer; namespace VW { /// /// A VowpalWabbit wrapper reading from JSON (see https://github.com/JohnLangford/vowpal_wabbit/wiki/JSON) /// public sealed class VowpalWabbitJson : IDisposable { private VowpalWabbit vw; /// /// Initializes a new instance of the class. /// /// Command line arguments passed to native instance. public VowpalWabbitJson(String args) : this(new VowpalWabbit(args)) { } /// /// Initializes a new instance of the class. /// /// Arguments passed to native instance. public VowpalWabbitJson(VowpalWabbitSettings settings) : this(new VowpalWabbit(settings)) { } /// /// Initializes a new instance of the class. /// /// The native instance to wrap. /// This instance takes ownership of instance and disposes it. public VowpalWabbitJson(VowpalWabbit vw) { if (vw == null) { throw new ArgumentNullException("vw"); } Contract.EndContractBlock(); this.vw = vw; } /// /// The wrapped VW instance. /// public VowpalWabbit Native { get { return this.vw; } } /// /// Learns from the given example. /// /// The example to learn. /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Optional index of example the given label should be applied for multi-line examples. public void Learn(string json, ILabel label = null, int? index = null) { using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var result = serializer.ParseAndCreate(json, label, index)) { result.Learn(); } } /// /// Learns from the given example. /// /// The example to learn. /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Optional index of example the given label should be applied for multi-line examples. public void Learn(JsonReader reader, ILabel label = null, int? index = null) { using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var result = serializer.ParseAndCreate(reader, label, index)) { result.Learn(); } } /// /// Learn from the given example and return the current prediction for it. /// /// The prediction type. /// The example to learn. /// The prediction factory to be used. See . /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Optional index of example the given label should be applied for multi-line examples. /// The prediction for the given . public TPrediction Learn(string json, IVowpalWabbitPredictionFactory predictionFactory, ILabel label = null, int? index = null) { using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var result = serializer.ParseAndCreate(json, label, index)) { return result.Learn(predictionFactory); } } /// /// Learn from the given example and return the current prediction for it. /// /// The prediction type. /// The example to learn. /// The prediction factory to be used. See . /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Optional index of example the given label should be applied for multi-line examples. /// The prediction for the given . public TPrediction Learn(JsonReader reader, IVowpalWabbitPredictionFactory predictionFactory, ILabel label = null, int? index = null) { using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var result = serializer.ParseAndCreate(reader, label, index)) { return result.Learn(predictionFactory); } } /// /// Predicts for the given example. /// /// The example to predict for. /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Optional index of example the given label should be applied for multi-line examples. public void Predict(string json, ILabel label = null, int? index = null) { Contract.Requires(json != null); using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var result = serializer.ParseAndCreate(json, label, index)) { result.Predict(); } } /// /// Predicts for the given example. /// /// The example to predict for. /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Optional index of example the given label should be applied for multi-line examples. public void Predict(JsonReader reader, ILabel label = null, int? index = null) { Contract.Requires(reader != null); using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var result = serializer.ParseAndCreate(reader, label, index)) { result.Predict(); } } /// /// Predicts for the given example. /// /// The prediction type. /// The example to predict for. /// The prediction factory to be used. See . /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Optional index of example the given label should be applied for multi-line examples. public TPrediction Predict(string json, IVowpalWabbitPredictionFactory predictionFactory, ILabel label = null, int? index = null) { Contract.Requires(json != null); using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var result = serializer.ParseAndCreate(json, label, index)) { return result.Predict(predictionFactory); } } /// /// Predicts for the given example. /// /// The prediction type. /// The example to predict for. /// The prediction factory to be used. See . /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Optional index of example the given label should be applied for multi-line examples. public TPrediction Predict(JsonReader reader, IVowpalWabbitPredictionFactory predictionFactory, ILabel label = null, int? index = null) { using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var result = serializer.ParseAndCreate(reader, label, index)) { return result.Predict(predictionFactory); } } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.vw != null) { this.vw.Dispose(); this.vw = null; } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/VowpalWabbitJsonThreadedPrediction.cs000066400000000000000000000027021332666127000266250ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- namespace VW { /// /// Enables multi-threaded prediction by utilizing a pool of instances. /// public sealed class VowpalWabbitJsonThreadedPrediction : VowpalWabbitThreadedPredictionBase { /// /// Initializes a new instance of . /// /// The model used by each pool instance. public VowpalWabbitJsonThreadedPrediction(VowpalWabbitModel model = null) : base(model) { } /// /// Creates a new instance of . /// /// The wrapped vw instance. protected override VowpalWabbitJson InternalCreate(VowpalWabbit vw) { return new VowpalWabbitJson(vw); } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/VowpalWabbitMarshalAction.cs000066400000000000000000000121411332666127000247550ustar00rootroot00000000000000using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using VW.Serializer.Intermediate; namespace VW.Serializer { internal interface IVowpalWabbitMarshalAction { void Marshal(VowpalWabbitDefaultMarshaller defaultMarshaller, VowpalWabbitMarshalContext context, Namespace ns, string featureName); } internal static class VowpalWabbitMarshalActions { internal static IVowpalWabbitMarshalAction Create(double data) { return new VowpalWabbitMarshalActionImpl(Marshal, data); } internal static IVowpalWabbitMarshalAction Create(string data) { return new VowpalWabbitMarshalActionImpl(Marshal, data); } internal static IVowpalWabbitMarshalAction Create(long data) { return new VowpalWabbitMarshalActionImpl(Marshal, data); } internal static IVowpalWabbitMarshalAction Create(bool data) { return new VowpalWabbitMarshalActionImpl(Marshal, data); } internal static IVowpalWabbitMarshalAction Create(float[] data, int length) { return new VowpalWabbitMarshalActionArrayImpl(Marshal, data, length); } internal static void Marshal(VowpalWabbitDefaultMarshaller defaultMarshaller, VowpalWabbitMarshalContext context, Namespace ns, string featureName, double val) { var feature = new PreHashedFeature(context.VW, ns, featureName); defaultMarshaller.MarshalFeature(context, ns, feature, val); } internal static void Marshal(VowpalWabbitDefaultMarshaller defaultMarshaller, VowpalWabbitMarshalContext context, Namespace ns, string featureName, string val) { var feature = new Feature(featureName); defaultMarshaller.MarshalFeatureStringEscapeAndIncludeName(context, ns, feature, val); } internal static void Marshal(VowpalWabbitDefaultMarshaller defaultMarshaller, VowpalWabbitMarshalContext context, Namespace ns, string featureName, bool val) { var feature = new PreHashedFeature(context.VW, ns, featureName); defaultMarshaller.MarshalFeature(context, ns, feature, val); } internal static void Marshal(VowpalWabbitDefaultMarshaller defaultMarshaller, VowpalWabbitMarshalContext context, Namespace ns, string featureName, long val) { var feature = new PreHashedFeature(context.VW, ns, featureName); defaultMarshaller.MarshalFeature(context, ns, feature, val); } internal static void Marshal(VowpalWabbitDefaultMarshaller defaultMarshaller, VowpalWabbitMarshalContext context, Namespace ns, string featureName, float[] values, int length) { var feature = new Feature(featureName); defaultMarshaller.MarshalFeature(context, ns, feature, values, 0, length); } /// /// Explicit closure to enable debug view /// private sealed class VowpalWabbitMarshalActionImpl : IVowpalWabbitMarshalAction { internal delegate void MarshalAction(VowpalWabbitDefaultMarshaller defaultMarshaller, VowpalWabbitMarshalContext context, Namespace ns, string featureName, T data); private readonly T data; private readonly MarshalAction marshal; internal VowpalWabbitMarshalActionImpl(MarshalAction marshal, T data) { this.data = data; this.marshal = marshal; } public void Marshal(VowpalWabbitDefaultMarshaller defaultMarshaller, VowpalWabbitMarshalContext context, Namespace ns, string featureName) { this.marshal(defaultMarshaller, context, ns, featureName, this.data); } } /// /// Explicit closure to enable debug view /// private sealed class VowpalWabbitMarshalActionArrayImpl : IVowpalWabbitMarshalAction { internal delegate void MarshalAction(VowpalWabbitDefaultMarshaller defaultMarshaller, VowpalWabbitMarshalContext context, Namespace ns, string featureName, T[] data, int length); private readonly T[] data; private readonly int length; private readonly MarshalAction marshal; internal VowpalWabbitMarshalActionArrayImpl( MarshalAction marshal, T[] data, int length) { this.data = data; this.length = length; this.marshal = marshal; } public void Marshal(VowpalWabbitDefaultMarshaller defaultMarshaller, VowpalWabbitMarshalContext context, Namespace ns, string featureName) { this.marshal(defaultMarshaller, context, ns, featureName, this.data, this.length); } } } internal delegate void VowpalWabbitMarshalAction(VowpalWabbitDefaultMarshaller defaultMarshaller, VowpalWabbitMarshalContext context, Namespace ns, string featureName); } vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/cs_json.csproj000066400000000000000000000150241332666127000222420ustar00rootroot00000000000000 Debug AnyCPU {9E27FA94-AB34-4736-8427-FB7A2BA90D52} Library Properties VW VowpalWabbit.JSON v4.5.2 512 ..\..\vowpalwabbit\ true ..\..\vowpalwabbit\x64\Debug\ DEBUG;TRACE full x64 prompt MinimumRecommendedRules.ruleset ..\..\vowpalwabbit\x64\Debug\VowpalWabbit.JSON.XML ..\..\vowpalwabbit\x64\Release\ TRACE true pdbonly x64 prompt MinimumRecommendedRules.ruleset ..\..\vowpalwabbit\x64\Release\VowpalWabbit.JSON.XML true ..\vw_key.snk $(SolutionDir)\packages\Newtonsoft.Json.9.0.1\lib\net45\Newtonsoft.Json.dll True {85e55ae0-3784-4968-9271-c81af560e1c1} vw_clr {e621e022-c1f8-433f-905a-ab9a3de072b7} vw_common {e4e962ae-7056-4eb0-a8c5-8dc824a4b068} cs This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/cs_json.nuspec000066400000000000000000000024211332666127000222340ustar00rootroot00000000000000 Vowpal Wabbit JSON VowpalWabbit.JSON vw vowpal wabbit langford ml machine learning John Langford et al $version$ https://github.com/JohnLangford/vowpal_wabbit/wiki/C%23-Binding https://github.com/JohnLangford/vowpal_wabbit/blob/master/LICENSE false Official Vowpal Wabbit library including C# interface Copyright (C) Microsoft Corp 2012-2016, Yahoo! Inc. 2007-2012, and many individual contributors. All rights reserved. vowpal-wabbit-8.6.1.dfsg1/cs/cs_json/packages.config000066400000000000000000000004701332666127000223260ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/cs_parallel/000077500000000000000000000000001332666127000202035ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/cs_parallel/Properties/000077500000000000000000000000001332666127000223375ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/cs_parallel/Properties/AssemblyInfo.cs000066400000000000000000000022351332666127000252630ustar00rootroot00000000000000//------------------------------------------------------------------------------ // // This code was generated by a tool. // Runtime Version:4.0.30319.42000 // // Changes to this file may cause incorrect behavior and will be lost if // the code is regenerated. // //------------------------------------------------------------------------------ [assembly: System.Reflection.AssemblyTitle("Vowpal Wabbit Parallel")] [assembly: System.Reflection.AssemblyDescription("Vowpal Wabbit Parallel")] [assembly: System.Reflection.AssemblyCompany("Microsoft Corp")] [assembly: System.Reflection.AssemblyProduct("Vowpal Wabbit")] [assembly: System.Reflection.AssemblyCopyright("Copyright (C) Microsoft Corp 2012-2016, Yahoo! Inc. 2007-2012, and many individua" + "l contributors. All rights reserved")] [assembly: System.Runtime.InteropServices.ComVisible(false)] [assembly: System.CLSCompliant(false)] [assembly: System.Runtime.InteropServices.Guid("0bb98c1a-b25f-43a0-94b6-fed77f7e5cd8")] [assembly: System.Reflection.AssemblyVersion("8.4.0.1")] [assembly: System.Reflection.AssemblyFileVersion("8.4.0.1")] vowpal-wabbit-8.6.1.dfsg1/cs/cs_parallel/VowpalWabbitAsync.cs000066400000000000000000000316061332666127000241370ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; using System.Text; using System.Threading.Tasks; using VW.Labels; using VW.Serializer; namespace VW { /// /// An async wrapper VW supporting data ingest using declarative serializer infrastructure used with . /// /// The user type to be serialized. public class VowpalWabbitAsync : IDisposable { /// /// The owning manager. /// private VowpalWabbitThreadedLearning manager; /// /// The serializers are not thread-safe. Thus we need to allocate one for each VW instance. /// private IVowpalWabbitSerializer[] serializers; internal VowpalWabbitAsync(VowpalWabbitThreadedLearning manager) { Contract.Requires(manager != null); Contract.Ensures(this.serializers != null); this.manager = manager; // create a serializer for each instance - maintaining separate example caches var serializer = VowpalWabbitSerializerFactory.CreateSerializer(manager.Settings); this.serializers = this.manager.VowpalWabbits .Select(vw => serializer.Create(vw)) .ToArray(); } /// /// Learns from the given example. /// /// The example to learn. /// The label for this . /// /// The method only enqueues the example for learning and returns immediately. /// You must not re-use the example. /// public void Learn(TExample example, ILabel label = null) { Contract.Requires(example != null); Contract.Requires(label != null); manager.Post(vw => { using (var ex = this.serializers[vw.Settings.Node].Serialize(example, label)) { ex.Learn(); } }); } /// /// Predicts for the given example. /// /// The example to predict for. /// /// The method only enqueues the example for prediction and returns immediately. /// You must not re-use the example. /// public void Predict(TExample example) { Contract.Requires(example != null); manager.Post(vw => { using (var ex = this.serializers[vw.Settings.Node].Serialize(example)) { ex.Predict(); } }); } /// /// Learns from the given example. /// /// The example to learn. /// The label for this . /// The prediction factory to be used. See . /// The prediction for the given . /// /// The method only enqueues the example for learning and returns immediately. /// Await the returned task to receive the prediction result. /// public Task Learn(TExample example, ILabel label, IVowpalWabbitPredictionFactory predictionFactory) { Contract.Requires(example != null); Contract.Requires(label != null); Contract.Requires(predictionFactory != null); return manager.Post(vw => { using (var ex = this.serializers[vw.Settings.Node].Serialize(example, label)) { return ex.Learn(predictionFactory); } }); } /// /// Predicts for the given example. /// /// The example to predict for. /// The prediction factory to be used. See . /// The prediction for the given . /// /// The method only enqueues the example for learning and returns immediately. /// Await the returned task to receive the prediction result. /// public Task Predict(TExample example, IVowpalWabbitPredictionFactory predictionFactory) { Contract.Requires(example != null); Contract.Requires(predictionFactory != null); return manager.Post(vw => { using (var ex = this.serializers[vw.Settings.Node].Serialize(example)) { return ex.Predict(predictionFactory); } }); } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.serializers != null) { foreach (var serializer in this.serializers) { // free cached examples serializer.Dispose(); } this.serializers = null; } } } } /// /// An async VW wrapper for multiline ingest. /// /// The user type of the shared feature. /// The user type for each action dependent feature. public class VowpalWabbitAsync : IDisposable { /// /// The owning manager. /// private readonly VowpalWabbitThreadedLearning manager; /// /// The serializers are not thread-safe. Thus we need to allocate one for each VW instance. /// private VowpalWabbitSingleExampleSerializer[] serializers; /// /// The serializers are not thread-safe. Thus we need to allocate one for each VW instance. /// private VowpalWabbitSingleExampleSerializer[] actionDependentFeatureSerializers; internal VowpalWabbitAsync(VowpalWabbitThreadedLearning manager) { if (manager == null) throw new ArgumentNullException("manager"); if (manager.Settings == null) throw new ArgumentNullException("manager.Settings"); if (manager.Settings.ParallelOptions == null) throw new ArgumentNullException("manager.Settings.ParallelOptions"); if (manager.Settings.ParallelOptions.MaxDegreeOfParallelism <= 0) throw new ArgumentOutOfRangeException("MaxDegreeOfParallelism must be greater than zero."); Contract.Ensures(this.serializers != null); Contract.Ensures(this.actionDependentFeatureSerializers != null); Contract.EndContractBlock(); this.manager = manager; // create a serializer for each instance - maintaining separate example caches var serializer = VowpalWabbitSerializerFactory.CreateSerializer(manager.Settings) as VowpalWabbitSingleExampleSerializerCompiler; if (serializer == null) throw new ArgumentException(string.Format( "{0} maps to a multiline example. Use VowpalWabbitAsync<{0}> instead.", typeof(TExample))); var adfSerializer = VowpalWabbitSerializerFactory.CreateSerializer(manager.Settings) as VowpalWabbitSingleExampleSerializerCompiler; if (adfSerializer == null) throw new ArgumentException(string.Format( "{0} maps to a multiline example. Use VowpalWabbitAsync<{0}> instead.", typeof(TActionDependentFeature))); this.serializers = this.manager.VowpalWabbits .Select(vw => serializer.Create(vw)) .ToArray(); this.actionDependentFeatureSerializers = this.manager.VowpalWabbits .Select(vw => adfSerializer.Create(vw)) .ToArray(); } /// /// Learn from the given example and return the current prediction for it. /// /// The shared example. /// The action dependent features. /// The index of the example to learn within . /// The label for the example to learn. public void Learn(TExample example, IReadOnlyCollection actionDependentFeatures, int index, ILabel label) { Contract.Requires(example != null); Contract.Requires(actionDependentFeatures != null); Contract.Requires(index >= 0); Contract.Requires(label != null); manager.Post(vw => VowpalWabbitMultiLine.Learn( vw, this.serializers[vw.Settings.Node], this.actionDependentFeatureSerializers[vw.Settings.Node], example, actionDependentFeatures, index, label)); } /// /// Learn from the given example and return the current prediction for it. /// /// The shared example. /// The action dependent features. /// The index of the example to learn within . /// The label for the example to learn. /// The ranked prediction for the given examples. public Task[]> LearnAndPredict(TExample example, IReadOnlyCollection actionDependentFeatures, int index, ILabel label) { Contract.Requires(example != null); Contract.Requires(actionDependentFeatures != null); Contract.Requires(index >= 0); Contract.Requires(label != null); return manager.Post(vw => VowpalWabbitMultiLine.LearnAndPredict( vw, this.serializers[vw.Settings.Node], this.actionDependentFeatureSerializers[vw.Settings.Node], example, actionDependentFeatures, index, label)); } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.serializers != null) { foreach (var serializer in this.serializers) { // free cached examples serializer.Dispose(); } this.serializers = null; } if (this.actionDependentFeatureSerializers != null) { foreach (var serializer in this.actionDependentFeatureSerializers) { // free cached examples serializer.Dispose(); } this.actionDependentFeatureSerializers = null; } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_parallel/VowpalWabbitThreadedLearning.cs000066400000000000000000000432551332666127000262650ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; using System.Threading; using System.Threading.Tasks; using System.Threading.Tasks.Dataflow; namespace VW { /// /// VW wrapper supporting multi-core learning by utilizing thread-based allreduce. /// public class VowpalWabbitThreadedLearning : IDisposable { /// /// Random generator used by uniform random example distributor. /// /// Initialized with static seed to enable reproducability. private readonly Random random = new Random(42); /// /// Configurable example distribution function choosing the vw instance for the next example. /// private readonly Func exampleDistributor; /// /// Native vw instances setup for thread-based allreduce /// private VowpalWabbit[] vws; /// /// Worker threads with a nice message queue infront that will start blocking once it's too full. /// private readonly ActionBlock>[] actionBlocks; /// /// The only offer non-blocking methods. Getting observers and calling OnNext() enables /// blocking once the queue is full. /// private readonly IObserver>[] observers; /// /// Invoked right after the root node performed AllReduce with the other instances. /// private readonly ConcurrentList> syncActions; /// /// Task enable waiting for clients on completion after all action blocks have finished (incl. cleanup). /// private Task[] completionTasks; /// /// Number of examples seen sofar. Used by round robin example distributor. /// private int exampleCount; /// /// Initializes a new instance of the class. /// /// Common settings used for vw instances. public VowpalWabbitThreadedLearning(VowpalWabbitSettings settings) { if (settings == null) throw new ArgumentNullException("settings"); if (settings.ParallelOptions == null) throw new ArgumentNullException("settings.ParallelOptions must be set"); Contract.EndContractBlock(); this.Settings = settings; if (this.Settings.ParallelOptions.CancellationToken == null) this.Settings.ParallelOptions.CancellationToken = new CancellationToken(); switch (this.Settings.ExampleDistribution) { case VowpalWabbitExampleDistribution.UniformRandom: this.exampleDistributor = _ => this.random.Next(this.observers.Length); break; case VowpalWabbitExampleDistribution.RoundRobin: this.exampleDistributor = localExampleCount => (int)(localExampleCount % this.observers.Length); break; } this.exampleCount = 0; this.syncActions = new ConcurrentList>(); this.vws = new VowpalWabbit[settings.ParallelOptions.MaxDegreeOfParallelism]; this.actionBlocks = new ActionBlock>[settings.ParallelOptions.MaxDegreeOfParallelism]; this.observers = new IObserver>[settings.ParallelOptions.MaxDegreeOfParallelism]; // setup AllReduce chain // root closure { var nodeSettings = (VowpalWabbitSettings)settings.Clone(); nodeSettings.Node = 0; var vw = this.vws[0] = new VowpalWabbit(nodeSettings); var actionBlock = this.actionBlocks[0] = new ActionBlock>( action => action(vw), new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 1, TaskScheduler = settings.ParallelOptions.TaskScheduler, CancellationToken = settings.ParallelOptions.CancellationToken, BoundedCapacity = (int)settings.MaxExampleQueueLengthPerInstance }); } for (int i = 1; i < settings.ParallelOptions.MaxDegreeOfParallelism; i++) { // closure vars var nodeSettings = (VowpalWabbitSettings)settings.Clone(); nodeSettings.Root = this.vws[0]; nodeSettings.Node = (uint)i; var vw = this.vws[i] = new VowpalWabbit(nodeSettings); var actionBlock = this.actionBlocks[i] = new ActionBlock>( action => action(vw), new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 1, TaskScheduler = settings.ParallelOptions.TaskScheduler, CancellationToken = settings.ParallelOptions.CancellationToken, BoundedCapacity = (int)settings.MaxExampleQueueLengthPerInstance }); } // get observers to allow for blocking calls this.observers = this.actionBlocks.Select(ab => ab.AsObserver()).ToArray(); this.completionTasks = new Task[settings.ParallelOptions.MaxDegreeOfParallelism]; // root closure { var vw = this.vws[0]; this.completionTasks[0] = this.actionBlocks[0].Completion .ContinueWith(_ => { // perform final AllReduce vw.EndOfPass(); // execute synchronization actions foreach (var syncAction in this.syncActions.RemoveAll()) { syncAction(vw); } }); } for (int i = 1; i < this.vws.Length; i++) { // perform final AllReduce var vw = this.vws[i]; this.completionTasks[i] = this.actionBlocks[i].Completion .ContinueWith(_ => vw.EndOfPass(), this.Settings.ParallelOptions.CancellationToken); } } /// /// VowpalWabbit instances participating in AllReduce. /// public VowpalWabbit[] VowpalWabbits { get { return this.vws; } } /// /// Creates a new instance of to feed examples of type . /// /// The user example type. /// A new instance of . public VowpalWabbitAsync Create() { return new VowpalWabbitAsync(this); } /// /// Creates a new instance of to feed multi-line /// examples of type and . /// /// The user example type. /// The user action dependent feature type. /// A new instance of . public VowpalWabbitAsync Create() { return new VowpalWabbitAsync(this); } /// /// Everytime examples have been enqueued, /// an AllReduce-sync operation () is injected. /// /// The number of examples enqueued so far. private uint CheckEndOfPass() { var exampleCount = (uint)Interlocked.Increment(ref this.exampleCount); // since there is no lock the input queue, it's not guaranteed that exactly // that number of examples are processed (but maybe a few more). if (exampleCount % this.Settings.ExampleCountPerRun == 0) { this.observers[0].OnNext(vw => { // perform AllReduce vw.EndOfPass(); // execute synchronization actions foreach (var syncAction in this.syncActions.RemoveAll()) { syncAction(vw); } }); for (int i = 1; i < this.observers.Length; i++) { // perform AllReduce this.observers[i].OnNext(vw => vw.EndOfPass()); } } return exampleCount; } /// /// Enqueues an action to be executed on one of vw instances. /// /// The action to be executed (e.g. Learn/Predict/...). /// If number of actions waiting to be executed has reached this method blocks. public void Post(Action action) { Contract.Requires(action != null); var exampleCount = this.CheckEndOfPass(); // dispatch this.observers[this.exampleDistributor(exampleCount)].OnNext(action); } /// /// Enqueues a task to be executed by single VowpalWabbit instance. /// /// Which VowpalWabbit instance chosen, is determined by . /// The return type of the task. /// The task to be executed. /// The awaitable result of the supplied task. internal Task Post(Func func) { Contract.Requires(func!= null); var exampleCount = this.CheckEndOfPass(); var completionSource = new TaskCompletionSource(); // dispatch to a Vowpal Wabbit instance this.observers[this.exampleDistributor(exampleCount)].OnNext(vw => { try { completionSource.SetResult(func(vw)); } catch (Exception ex) { completionSource.SetException(ex); } }); return completionSource.Task; } /// /// Learns from the given example. /// /// The example to learn. public void Learn(string line) { Contract.Requires(line != null); this.Post(vw => vw.Learn(line)); } /// /// Learns from the given example. /// /// The multi-line example to learn. public void Learn(IEnumerable lines) { Contract.Requires(lines != null); this.Post(vw => vw.Learn(lines)); } /// /// Synchronized performance statistics. /// /// The task is only completed after synchronization of all instances, triggered example. public Task PerformanceStatistics { get { var completionSource = new TaskCompletionSource(); this.syncActions.Add(vw => completionSource.SetResult(vw.PerformanceStatistics)); return completionSource.Task; } } /// /// Signal that no more examples are send. /// /// Task completes once the learning and cleanup is done. public Task Complete() { // make sure no more sync actions are added, which might otherwise never been called this.syncActions.CompleteAdding(); foreach (var actionBlock in this.actionBlocks) { actionBlock.Complete(); } return Task.WhenAll(this.completionTasks); } /// /// Saves a model as part of the synchronization. /// /// Task compeletes once the model is saved. public Task SaveModel() { var completionSource = new TaskCompletionSource(); this.syncActions.Add(vw => { vw.SaveModel(); completionSource.SetResult(true); }); return completionSource.Task; } /// /// Saves a model as part of the synchronization. /// /// Task compeletes once the model is saved. public Task SaveModel(string filename) { Contract.Requires(!string.IsNullOrEmpty(filename)); var completionSource = new TaskCompletionSource(); this.syncActions.Add(vw => { vw.SaveModel(filename); completionSource.SetResult(true); }); return completionSource.Task; } /// /// The settings shared across all instances. /// public VowpalWabbitSettings Settings { get; private set; } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.completionTasks != null) { // mark completion this.Complete() .Wait(this.Settings.ParallelOptions.CancellationToken); // wait for all actionblocks to finish Task.WhenAll(this.completionTasks) .Wait(this.Settings.ParallelOptions.CancellationToken); this.completionTasks = null; } if (this.vws != null) { foreach (var vw in this.vws) { vw.Dispose(); } this.vws = null; } } } /// /// Thread-safe list implementation supporting completion. /// /// The element type. private class ConcurrentList { private bool completed = false; private readonly List items = new List(); private readonly object lockObject = new object(); /// /// Adds an object to the end of the list. /// /// The object to be added to the list. /// Throws an if the as called previously. public void Add(T item) { lock (this.lockObject) { if (completed) { throw new InvalidOperationException("ConcurrentList has been marked completed."); } this.items.Add(item); } } /// /// Marks this list as complete. Any subsequent calls to will trigger an . /// public void CompleteAdding() { lock (this.lockObject) { this.completed = true; } } /// /// Removes all elements from the list. /// /// The elements removed. public T[] RemoveAll() { lock (this.lockObject) { var ret = this.items.ToArray(); this.items.Clear(); return ret; } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_parallel/cs_parallel.csproj000066400000000000000000000137411332666127000237140ustar00rootroot00000000000000 Debug AnyCPU {08636F79-5577-4AF2-8EED-EC8A5BC14AC4} Library Properties VW VowpalWabbit.Parallel v4.5.2 512 ..\..\vowpalwabbit\ true ..\..\vowpalwabbit\x64\Debug\ DEBUG;TRACE full x64 prompt MinimumRecommendedRules.ruleset ..\..\vowpalwabbit\x64\Debug\VowpalWabbit.Parallel.XML ..\..\vowpalwabbit\x64\Release\ TRACE true pdbonly x64 prompt MinimumRecommendedRules.ruleset ..\..\vowpalwabbit\x64\Release\VowpalWabbit.Parallel.XML true ..\vw_key.snk $(SolutionDir)\packages\gitlink.2.3.0\lib\net45\GitLink.exe True $(SolutionDir)\packages\Microsoft.Tpl.Dataflow.4.5.24\lib\portable-net45+win8+wpa81\System.Threading.Tasks.Dataflow.dll True {85e55ae0-3784-4968-9271-c81af560e1c1} vw_clr {e621e022-c1f8-433f-905a-ab9a3de072b7} vw_common {e4e962ae-7056-4eb0-a8c5-8dc824a4b068} cs This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. vowpal-wabbit-8.6.1.dfsg1/cs/cs_parallel/cs_parallel.nuspec000066400000000000000000000024551332666127000237110ustar00rootroot00000000000000 Vowpal Wabbit Parallel VowpalWabbit.Parallel vw vowpal wabbit langford ml machine learning John Langford et al $version$ https://github.com/JohnLangford/vowpal_wabbit/wiki/C%23-Binding https://github.com/JohnLangford/vowpal_wabbit/blob/master/LICENSE false Official Vowpal Wabbit library including C# interface Copyright (C) Microsoft Corp 2012-2016, Yahoo! Inc. 2007-2012, and many individual contributors. All rights reserved. vowpal-wabbit-8.6.1.dfsg1/cs/cs_parallel/packages.config000066400000000000000000000005001332666127000231430ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/cs_unittest_nofriend/000077500000000000000000000000001332666127000221525ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/cs_unittest_nofriend/Properties/000077500000000000000000000000001332666127000243065ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/cs_unittest_nofriend/Properties/AssemblyInfo.cs000066400000000000000000000026101332666127000272270ustar00rootroot00000000000000using System.Reflection; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; // General Information about an assembly is controlled through the following // set of attributes. Change these attribute values to modify the information // associated with an assembly. [assembly: AssemblyTitle("cs_unittest_nofriend")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("")] [assembly: AssemblyProduct("cs_unittest_nofriend")] [assembly: AssemblyCopyright("Copyright © 2016")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] // Setting ComVisible to false makes the types in this assembly not visible // to COM components. If you need to access a type in this assembly from // COM, set the ComVisible attribute to true on that type. [assembly: ComVisible(false)] // The following GUID is for the ID of the typelib if this project is exposed to COM [assembly: Guid("49a85b7e-197e-4265-90d0-886ea6c014f3")] // Version information for an assembly consists of the following four values: // // Major Version // Minor Version // Build Number // Revision // // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] [assembly: AssemblyVersion("1.0.0.0")] [assembly: AssemblyFileVersion("1.0.0.0")] vowpal-wabbit-8.6.1.dfsg1/cs/cs_unittest_nofriend/TestSerializer.cs000066400000000000000000000010241332666127000254470ustar00rootroot00000000000000using System; using Microsoft.VisualStudio.TestTools.UnitTesting; using VW.Serializer.Attributes; using VW; namespace cs_unittest_nofriend { class PrivateClass { [Feature] public int A { get; set; } } [TestClass] public class TestSerializer { [TestMethod] [ExpectedException(typeof(ArgumentException))] public void TestPrivateClassException() { using (var vw = new VowpalWabbit("")) { } } } } vowpal-wabbit-8.6.1.dfsg1/cs/cs_unittest_nofriend/cs_unittest_nofriend.csproj000066400000000000000000000113331332666127000276250ustar00rootroot00000000000000 Debug AnyCPU {49A85B7E-197E-4265-90D0-886EA6C014F3} Library Properties cs_unittest_nofriend cs_unittest_nofriend v4.5.2 512 {3AC096D0-A1C2-E12C-1390-A8335801FDAB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} 10.0 $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion) $(ProgramFiles)\Common Files\microsoft shared\VSTT\$(VisualStudioVersion)\UITestExtensionPackages False UnitTest true full false bin\Debug\ DEBUG;TRACE prompt 4 pdbonly true bin\Release\ TRACE prompt 4 {85e55ae0-3784-4968-9271-c81af560e1c1} vw_clr {e621e022-c1f8-433f-905a-ab9a3de072b7} vw_common {e4e962ae-7056-4eb0-a8c5-8dc824a4b068} cs False False False False vowpal-wabbit-8.6.1.dfsg1/cs/leaktest/000077500000000000000000000000001332666127000175365ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/leaktest/Properties/000077500000000000000000000000001332666127000216725ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/leaktest/Properties/AssemblyInfo.cs000066400000000000000000000025661332666127000246250ustar00rootroot00000000000000using System.Reflection; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; // General Information about an assembly is controlled through the following // set of attributes. Change these attribute values to modify the information // associated with an assembly. [assembly: AssemblyTitle("cs_leaktest")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("")] [assembly: AssemblyProduct("cs_leaktest")] [assembly: AssemblyCopyright("Copyright © 2015")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] // Setting ComVisible to false makes the types in this assembly not visible // to COM components. If you need to access a type in this assembly from // COM, set the ComVisible attribute to true on that type. [assembly: ComVisible(false)] // The following GUID is for the ID of the typelib if this project is exposed to COM [assembly: Guid("d5a87745-39ce-427d-8463-5dde8d630f26")] // Version information for an assembly consists of the following four values: // // Major Version // Minor Version // Build Number // Revision // // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] [assembly: AssemblyVersion("1.0.0.0")] [assembly: AssemblyFileVersion("1.0.0.0")] vowpal-wabbit-8.6.1.dfsg1/cs/leaktest/TestLeak.cs000066400000000000000000000016621332666127000216060ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace cs_leaktest { [TestClass] public class TestLeakClass : TestWrappedBase { #if DEBUG /// /// Tests if the leak detector actually works. /// /// Only possible in debug as VLD is not linked against in release. [TestMethod] public void TestLeak() { try { Run("cs_unittest.TestLeakClass", "Leak"); } catch (AssertFailedException ex) { Assert.IsTrue(ex.Message.Contains("Total 492 bytes")); // 123 *4 } } [TestMethod] public void TestNoLeak() { Run("cs_unittest.TestLeakClass", "NoLeak"); } #endif } } vowpal-wabbit-8.6.1.dfsg1/cs/leaktest/TestWrapped.cs000066400000000000000000001231171332666127000223340ustar00rootroot00000000000000 using Microsoft.VisualStudio.TestTools.UnitTesting; using cs_leaktest; namespace cs_unittest { [TestClass] public class TestWrapped : TestWrappedBase { [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test1() { Run("cs_unittest.RunTests", "CommandLine_Test1"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test2() { Run("cs_unittest.RunTests", "CommandLine_Test2"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test3() { Run("cs_unittest.RunTests", "CommandLine_Test3"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test4() { Run("cs_unittest.RunTests", "CommandLine_Test4"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test5() { Run("cs_unittest.RunTests", "CommandLine_Test5"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test6() { Run("cs_unittest.RunTests", "CommandLine_Test6"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test7() { Run("cs_unittest.RunTests", "CommandLine_Test7"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test8() { Run("cs_unittest.RunTests", "CommandLine_Test8"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test11() { Run("cs_unittest.RunTests", "CommandLine_Test11"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test12() { Run("cs_unittest.RunTests", "CommandLine_Test12"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test15() { Run("cs_unittest.RunTests", "CommandLine_Test15"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test21() { Run("cs_unittest.RunTests", "CommandLine_Test21"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test22() { Run("cs_unittest.RunTests", "CommandLine_Test22"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test23() { Run("cs_unittest.RunTests", "CommandLine_Test23"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test27() { Run("cs_unittest.RunTests", "CommandLine_Test27"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test28() { Run("cs_unittest.RunTests", "CommandLine_Test28"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test29() { Run("cs_unittest.RunTests", "CommandLine_Test29"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test30() { Run("cs_unittest.RunTests", "CommandLine_Test30"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test35() { Run("cs_unittest.RunTests", "CommandLine_Test35"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test36() { Run("cs_unittest.RunTests", "CommandLine_Test36"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test37() { Run("cs_unittest.RunTests", "CommandLine_Test37"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test38() { Run("cs_unittest.RunTests", "CommandLine_Test38"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test39() { Run("cs_unittest.RunTests", "CommandLine_Test39"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test40() { Run("cs_unittest.RunTests", "CommandLine_Test40"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test41() { Run("cs_unittest.RunTests", "CommandLine_Test41"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test62() { Run("cs_unittest.RunTests", "CommandLine_Test62"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test63() { Run("cs_unittest.RunTests", "CommandLine_Test63"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test64() { Run("cs_unittest.RunTests", "CommandLine_Test64"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test72() { Run("cs_unittest.RunTests", "CommandLine_Test72"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test73() { Run("cs_unittest.RunTests", "CommandLine_Test73"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test74() { Run("cs_unittest.RunTests", "CommandLine_Test74"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test75() { Run("cs_unittest.RunTests", "CommandLine_Test75"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test76() { Run("cs_unittest.RunTests", "CommandLine_Test76"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test78() { Run("cs_unittest.RunTests", "CommandLine_Test78"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test79() { Run("cs_unittest.RunTests", "CommandLine_Test79"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test80() { Run("cs_unittest.RunTests", "CommandLine_Test80"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test81() { Run("cs_unittest.RunTests", "CommandLine_Test81"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test82() { Run("cs_unittest.RunTests", "CommandLine_Test82"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test83() { Run("cs_unittest.RunTests", "CommandLine_Test83"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test88() { Run("cs_unittest.RunTests", "CommandLine_Test88"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test89() { Run("cs_unittest.RunTests", "CommandLine_Test89"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test90() { Run("cs_unittest.RunTests", "CommandLine_Test90"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test91() { Run("cs_unittest.RunTests", "CommandLine_Test91"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test94() { Run("cs_unittest.RunTests", "CommandLine_Test94"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test95() { Run("cs_unittest.RunTests", "CommandLine_Test95"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test97() { Run("cs_unittest.RunTests", "CommandLine_Test97"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test107() { Run("cs_unittest.RunTests", "CommandLine_Test107"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test108() { Run("cs_unittest.RunTests", "CommandLine_Test108"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test109() { Run("cs_unittest.RunTests", "CommandLine_Test109"); } [TestCategory("Command Line")] [TestMethod] public void CommandLine_Test113() { Run("cs_unittest.RunTests", "CommandLine_Test113"); } [TestMethod] public void TestAllReduce() { Run("cs_unittest.TestAllReduceClass", "TestAllReduce"); } [TestMethod] public void TestExampleCacheForLearning() { Run("cs_unittest.TestExampleCacheCases", "TestExampleCacheForLearning"); } [TestMethod] public void TestExampleCacheDisabledForLearning() { Run("cs_unittest.TestExampleCacheCases", "TestExampleCacheDisabledForLearning"); } [TestMethod] public void TestExampleCache() { Run("cs_unittest.TestExampleCacheCases", "TestExampleCache"); } [TestMethod] public void TestHash() { Run("cs_unittest.TestManagedHash", "TestHash"); } [TestCategory("Marshal")] [TestMethod] public void TestEnumerize() { Run("cs_unittest.TestMarshalling", "TestEnumerize"); } [TestCategory("Marshal")] [TestMethod] public void TestString() { Run("cs_unittest.TestMarshalling", "TestString"); } [TestCategory("Marshal")] [TestMethod] public void TestStringFeatureGroup() { Run("cs_unittest.TestMarshalling", "TestStringFeatureGroup"); } [TestCategory("Marshal")] [TestMethod] public void TestStringNamespace() { Run("cs_unittest.TestMarshalling", "TestStringNamespace"); } [TestCategory("Marshal")] [TestMethod] public void TestStringEscape() { Run("cs_unittest.TestMarshalling", "TestStringEscape"); } [TestCategory("Marshal")] [TestMethod] public void TestStringSplit() { Run("cs_unittest.TestMarshalling", "TestStringSplit"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionary() { Run("cs_unittest.TestMarshalling", "TestDictionary"); } [TestCategory("Marshal")] [TestMethod] public void TestCustomType() { Run("cs_unittest.TestMarshalling", "TestCustomType"); } [TestCategory("Marshal")] [TestMethod] public void TestEnumerableString() { Run("cs_unittest.TestMarshalling", "TestEnumerableString"); } [TestCategory("Marshal")] [TestMethod] public void TestEnumerableKV() { Run("cs_unittest.TestMarshalling", "TestEnumerableKV"); } [TestCategory("Marshal")] [TestMethod] public void TestComplexType() { Run("cs_unittest.TestMarshalling", "TestComplexType"); } [TestCategory("Marshal")] [TestMethod] public void TestEnumerizePosition() { Run("cs_unittest.TestMarshalling", "TestEnumerizePosition"); } [TestCategory("Marshal")] [TestMethod] public void TestBool() { Run("cs_unittest.TestMarshalling", "TestBool"); } [TestCategory("Marshal")] [TestMethod] public void TestFeatureDiscoveryAll() { Run("cs_unittest.TestMarshalling", "TestFeatureDiscoveryAll"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericInt64Overflow() { Run("cs_unittest.TestMarshallingOverflow", "TestNumericInt64Overflow"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericUInt64Overflow() { Run("cs_unittest.TestMarshallingOverflow", "TestNumericUInt64Overflow"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericDoubleOverflow() { Run("cs_unittest.TestMarshallingOverflow", "TestNumericDoubleOverflow"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericInt64OverflowArray() { Run("cs_unittest.TestMarshallingOverflow", "TestNumericInt64OverflowArray"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericUInt64OverflowArray() { Run("cs_unittest.TestMarshallingOverflow", "TestNumericUInt64OverflowArray"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericDoubleOverflowArray() { Run("cs_unittest.TestMarshallingOverflow", "TestNumericDoubleOverflowArray"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryUInt16UInt32() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryUInt16UInt32"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryUInt16Single() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryUInt16Single"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryUInt16Int64() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryUInt16Int64"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryUInt16UInt64() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryUInt16UInt64"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryUInt16Double() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryUInt16Double"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericByte() { Run("cs_unittest.TestMarshalNumeric", "TestNumericByte"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericByteArray() { Run("cs_unittest.TestMarshalNumeric", "TestNumericByteArray"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericByteArrayAnchor() { Run("cs_unittest.TestMarshalNumeric", "TestNumericByteArrayAnchor"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericSByte() { Run("cs_unittest.TestMarshalNumeric", "TestNumericSByte"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericSByteArray() { Run("cs_unittest.TestMarshalNumeric", "TestNumericSByteArray"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericSByteArrayAnchor() { Run("cs_unittest.TestMarshalNumeric", "TestNumericSByteArrayAnchor"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericInt16() { Run("cs_unittest.TestMarshalNumeric", "TestNumericInt16"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericInt16Array() { Run("cs_unittest.TestMarshalNumeric", "TestNumericInt16Array"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericInt16ArrayAnchor() { Run("cs_unittest.TestMarshalNumeric", "TestNumericInt16ArrayAnchor"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericInt32() { Run("cs_unittest.TestMarshalNumeric", "TestNumericInt32"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericInt32Array() { Run("cs_unittest.TestMarshalNumeric", "TestNumericInt32Array"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericInt32ArrayAnchor() { Run("cs_unittest.TestMarshalNumeric", "TestNumericInt32ArrayAnchor"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericUInt16() { Run("cs_unittest.TestMarshalNumeric", "TestNumericUInt16"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericUInt16Array() { Run("cs_unittest.TestMarshalNumeric", "TestNumericUInt16Array"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericUInt16ArrayAnchor() { Run("cs_unittest.TestMarshalNumeric", "TestNumericUInt16ArrayAnchor"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericUInt32() { Run("cs_unittest.TestMarshalNumeric", "TestNumericUInt32"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericUInt32Array() { Run("cs_unittest.TestMarshalNumeric", "TestNumericUInt32Array"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericUInt32ArrayAnchor() { Run("cs_unittest.TestMarshalNumeric", "TestNumericUInt32ArrayAnchor"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericSingle() { Run("cs_unittest.TestMarshalNumeric", "TestNumericSingle"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericSingleArray() { Run("cs_unittest.TestMarshalNumeric", "TestNumericSingleArray"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericSingleArrayAnchor() { Run("cs_unittest.TestMarshalNumeric", "TestNumericSingleArrayAnchor"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericInt64() { Run("cs_unittest.TestMarshalNumeric", "TestNumericInt64"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericInt64Array() { Run("cs_unittest.TestMarshalNumeric", "TestNumericInt64Array"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericInt64ArrayAnchor() { Run("cs_unittest.TestMarshalNumeric", "TestNumericInt64ArrayAnchor"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericUInt64() { Run("cs_unittest.TestMarshalNumeric", "TestNumericUInt64"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericUInt64Array() { Run("cs_unittest.TestMarshalNumeric", "TestNumericUInt64Array"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericUInt64ArrayAnchor() { Run("cs_unittest.TestMarshalNumeric", "TestNumericUInt64ArrayAnchor"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericDouble() { Run("cs_unittest.TestMarshalNumeric", "TestNumericDouble"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericDoubleArray() { Run("cs_unittest.TestMarshalNumeric", "TestNumericDoubleArray"); } [TestCategory("Marshal")] [TestMethod] public void TestNumericDoubleArrayAnchor() { Run("cs_unittest.TestMarshalNumeric", "TestNumericDoubleArrayAnchor"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryByte() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryByte"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryByteString() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryByteString"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryByteByte() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryByteByte"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryByteSByte() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryByteSByte"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryByteInt16() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryByteInt16"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryByteInt32() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryByteInt32"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryByteUInt16() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryByteUInt16"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryByteUInt32() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryByteUInt32"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryByteSingle() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryByteSingle"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryByteInt64() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryByteInt64"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryByteUInt64() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryByteUInt64"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryByteDouble() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryByteDouble"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionarySByte() { Run("cs_unittest.TestMarshalNumeric", "TestDictionarySByte"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionarySByteString() { Run("cs_unittest.TestMarshalNumeric", "TestDictionarySByteString"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionarySByteByte() { Run("cs_unittest.TestMarshalNumeric", "TestDictionarySByteByte"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionarySByteSByte() { Run("cs_unittest.TestMarshalNumeric", "TestDictionarySByteSByte"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionarySByteInt16() { Run("cs_unittest.TestMarshalNumeric", "TestDictionarySByteInt16"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionarySByteInt32() { Run("cs_unittest.TestMarshalNumeric", "TestDictionarySByteInt32"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionarySByteUInt16() { Run("cs_unittest.TestMarshalNumeric", "TestDictionarySByteUInt16"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionarySByteUInt32() { Run("cs_unittest.TestMarshalNumeric", "TestDictionarySByteUInt32"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionarySByteSingle() { Run("cs_unittest.TestMarshalNumeric", "TestDictionarySByteSingle"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionarySByteInt64() { Run("cs_unittest.TestMarshalNumeric", "TestDictionarySByteInt64"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionarySByteUInt64() { Run("cs_unittest.TestMarshalNumeric", "TestDictionarySByteUInt64"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionarySByteDouble() { Run("cs_unittest.TestMarshalNumeric", "TestDictionarySByteDouble"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt16() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt16"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt16String() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt16String"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt16Byte() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt16Byte"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt16SByte() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt16SByte"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt16Int16() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt16Int16"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt16Int32() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt16Int32"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt16UInt16() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt16UInt16"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt16UInt32() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt16UInt32"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt16Single() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt16Single"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt16Int64() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt16Int64"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt16UInt64() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt16UInt64"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt16Double() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt16Double"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt32() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt32"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt32String() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt32String"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt32Byte() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt32Byte"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt32SByte() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt32SByte"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt32Int16() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt32Int16"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt32Int32() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt32Int32"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt32UInt16() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt32UInt16"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt32UInt32() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt32UInt32"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt32Single() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt32Single"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt32Int64() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt32Int64"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt32UInt64() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt32UInt64"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryInt32Double() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryInt32Double"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryUInt16() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryUInt16"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryUInt16String() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryUInt16String"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryUInt16Byte() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryUInt16Byte"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryUInt16SByte() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryUInt16SByte"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryUInt16Int16() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryUInt16Int16"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryUInt16Int32() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryUInt16Int32"); } [TestCategory("Marshal")] [TestMethod] public void TestDictionaryUInt16UInt16() { Run("cs_unittest.TestMarshalNumeric", "TestDictionaryUInt16UInt16"); } [TestCategory("Model Loading")] [TestMethod] public void TestLoadModelCorrupt() { Run("cs_unittest.TestModelLoading", "TestLoadModelCorrupt"); } [TestCategory("Model Loading")] [TestMethod] public void TestLoadModel() { Run("cs_unittest.TestModelLoading", "TestLoadModel"); } [TestCategory("Model Loading")] [TestMethod] public void TestLoadModelRandomCorrupt() { Run("cs_unittest.TestModelLoading", "TestLoadModelRandomCorrupt"); } [TestCategory("Model Loading")] [TestMethod] public void TestLoadModelInMemory() { Run("cs_unittest.TestModelLoading", "TestLoadModelInMemory"); } [TestCategory("Model Loading")] [TestMethod] public void TestID() { Run("cs_unittest.TestModelLoading", "TestID"); } [TestCategory("Model Loading")] [TestMethod] public void TestReload() { Run("cs_unittest.TestModelLoading", "TestReload"); } [TestCategory("Null")] [TestMethod] public void TestNull1() { Run("cs_unittest.TestNull", "TestNull1"); } [TestCategory("Null")] [TestMethod] public void TestNull2() { Run("cs_unittest.TestNull", "TestNull2"); } [TestCategory("Null")] [TestMethod] public void TestNull3() { Run("cs_unittest.TestNull", "TestNull3"); } [TestCategory("Null")] [TestMethod] public void TestNull4() { Run("cs_unittest.TestNull", "TestNull4"); } [TestCategory("Null")] [TestMethod] public void TestNull5() { Run("cs_unittest.TestNull", "TestNull5"); } [TestMethod] public void TestCustomFeaturizer() { Run("cs_unittest.TestSerializer", "TestCustomFeaturizer"); } [TestMethod] public void TestCustomFeaturizerOverideMethod() { Run("cs_unittest.TestSerializer", "TestCustomFeaturizerOverideMethod"); } [TestMethod] public void TestDictify() { Run("cs_unittest.TestSerializer", "TestDictify"); } [TestCategory("Command line through marshalling")] [TestMethod] public void Test1and2() { Run("cs_test.Test1and2Class", "Test1and2"); } [TestCategory("Command line through marshalling")] [TestMethod] public void Test3() { Run("cs_unittest.Test3Class", "Test3"); } [TestCategory("Command line through marshalling")] [TestMethod] public void Test4and6() { Run("cs_unittest.Test3Class", "Test4and6"); } [TestCategory("Command line through marshalling")] [TestMethod] public void Test5() { Run("cs_unittest.Test3Class", "Test5"); } [TestCategory("Command line through marshalling")] [TestMethod] public void Test7and8() { Run("cs_unittest.Test3Class", "Test7and8"); } [TestCategory("Command line through marshalling")] [TestMethod] public void Test87() { Run("cs_unittest.TestCbAdfClass", "Test87"); } [TestMethod] public void TestSharedModel() { Run("cs_unittest.TestCbAdfClass", "TestSharedModel"); } [TestMethod] public void TestAntlr() { Run("cs_unittest.TestAntlrClass", "TestAntlr"); } [TestCategory("ObjectPool")] [TestMethod] public void ObjectPoolTestEmptyFactory() { Run("cs_unittest.TestPooling", "ObjectPoolTestEmptyFactory"); } [TestCategory("ObjectPool")] [TestMethod] public void ObjectPoolTestDisposed1() { Run("cs_unittest.TestPooling", "ObjectPoolTestDisposed1"); } [TestCategory("ObjectPool")] [TestMethod] public void ObjectPoolTestDisposed2() { Run("cs_unittest.TestPooling", "ObjectPoolTestDisposed2"); } [TestCategory("ObjectPool")] [TestMethod] public void ObjectPoolTestDangling() { Run("cs_unittest.TestPooling", "ObjectPoolTestDangling"); } [TestCategory("ObjectPool")] [TestMethod] public void ObjectPoolTestFactory() { Run("cs_unittest.TestPooling", "ObjectPoolTestFactory"); } [TestCategory("ObjectPool")] [TestMethod] public void ObjectPoolTestConcurrency() { Run("cs_unittest.TestPooling", "ObjectPoolTestConcurrency"); } [TestMethod] public void VwCleanupTest() { Run("cs_unittest.TestWrapper", "VwCleanupTest"); } [TestMethod] public void VwCleanupTestError() { Run("cs_unittest.TestWrapper", "VwCleanupTestError"); } [TestMethod] public void VwModelRefCountingTest() { Run("cs_unittest.TestWrapper", "VwModelRefCountingTest"); } } } vowpal-wabbit-8.6.1.dfsg1/cs/leaktest/TestWrappedBase.cs000066400000000000000000000077621332666127000231360ustar00rootroot00000000000000using cs_testcommon; using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Linq; using System.Runtime.InteropServices; using System.Text; using System.Text.RegularExpressions; using System.Threading; using System.Threading.Tasks; using VLD; namespace cs_leaktest { public class TestWrappedBase { [DllImport("kernel32", SetLastError = true, CharSet = CharSet.Ansi)] static extern IntPtr LoadLibrary([MarshalAs(UnmanagedType.LPStr)]string lpFileName); [DllImport("kernel32.dll", SetLastError = true)] [return: MarshalAs(UnmanagedType.Bool)] static extern bool FreeLibrary(IntPtr hModule); /// /// Implement custom StackTrace so one can easily click in Test Explorer /// internal class CustomException : Exception { private readonly string stackTrace; internal CustomException(string message, string stackTrace) : base(message) { this.stackTrace = stackTrace; } public override string StackTrace { get { return this.stackTrace; } } } public TestContext TestContext { get; set; } private object lockObject = new object(); protected void Run(string type, string method) { lock (lockObject) { using (var vld = new VisualLeakDetector()) { try { // vowpalwabbit\x64\Debug\cs_leaktest.dll var basePath = Path.GetDirectoryName(typeof(VisualLeakDetector).Assembly.Location); var handle = LoadLibrary(basePath + @"\\VowpalWabbitCore.dll"); var appDomain = AppDomain.CreateDomain("Test1"); try { ITestRunner test1 = (ITestRunner)appDomain.CreateInstanceFromAndUnwrap(basePath + @"\\cs_unittest.dll", "cs_unittest.TestRunner"); Environment.CurrentDirectory = basePath + @"\..\..\..\test"; var result = test1.Run(type, method); if (result != null) { // check for exception marker var index = result.IndexOf("#-#-#-#-#-#-#"); if (index == -1) { Assert.Fail(result); } throw new CustomException(result.Substring(0, index), result.Substring(index + 13)); } } finally { AppDomain.Unload(appDomain); } try { FreeLibrary(handle); FreeLibrary(handle); vld.ReportLeaks(); var message = string.Concat(vld.Messages.Select(t => t.Item2)); var blocks = message.Split(new[] { "---------- Block " }, StringSplitOptions.None) .Where(block => Regex.IsMatch(block, "^\\d+ at")) .ToList(); Assert.AreEqual(0, blocks.Count, string.Join("\n", blocks)); } finally { LoadLibrary(basePath + @"\VowpalWabbitCore.dll"); } } finally { vld.MarkAllLeaksAsReported(); } } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/leaktest/cs_leaktest.csproj000066400000000000000000000160011332666127000232570ustar00rootroot00000000000000 Debug AnyCPU {492EA3A7-8A41-459E-BBC3-5A3FE9DDFC68} Library Properties cs_leaktest cs_leaktest v4.5.2 512 {3AC096D0-A1C2-E12C-1390-A8335801FDAB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} 10.0 $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion) $(ProgramFiles)\Common Files\microsoft shared\VSTT\$(VisualStudioVersion)\UITestExtensionPackages False UnitTest ..\..\vowpalwabbit\ fc8dceef true $(SolutionDir)\x64\Debug\ DEBUG;TRACE true full x64 prompt MinimumRecommendedRules.ruleset $(SolutionDir)\x64\Release\ TRACE true pdbonly x64 prompt MinimumRecommendedRules.ruleset true $(SolutionDir)\x64\DebugLeakCheck\ DEBUG;TRACE true full x64 prompt MinimumRecommendedRules.ruleset true {6a9cbeab-427f-4d8d-9559-b76b42b0895c} cs_testcommon {3d57a6af-de8c-40dc-abde-e4ce1b9c0d20} cs_vld dbghelp.dll PreserveNewest vld_x64.dll PreserveNewest Microsoft.DTfW.DHL.manifest PreserveNewest False False False False This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. vowpal-wabbit-8.6.1.dfsg1/cs/setup/000077500000000000000000000000001332666127000170625ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/setup/Product.wxs000066400000000000000000000055121332666127000212500ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/setup/packages.config000066400000000000000000000002021332666127000220210ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/setup/setup.wixproj000066400000000000000000000147451332666127000216610ustar00rootroot00000000000000 Debug x86 3.10 1f25d39d-71c3-4fde-a58f-d3d5d4f6dffb 2.0 VowpalWabbit Package $(MSBuildExtensionsPath32)\Microsoft\WiX\v3.x\Wix.targets $(MSBuildExtensionsPath)\Microsoft\WiX\v3.x\Wix.targets ..\..\vowpalwabbit\ ..\..\vowpalwabbit\x64\$(Configuration)\ obj\$(Configuration)\ Debug ..\..\vowpalwabbit\x64\$(Configuration)\ obj\$(Configuration)\ Debug ..\..\vowpalwabbit\x64\$(Configuration)\ obj\$(Platform)\$(Configuration)\ ..\..\vowpalwabbit\x64\$(Configuration)\ obj\$(Platform)\$(Configuration)\ True vw {1055a78f-1e3a-4e6c-bbf5-0b63299c4adf} True True Binaries;Content;Satellites INSTALLFOLDER vw_clr {85e55ae0-3784-4968-9271-c81af560e1c1} True True Binaries;Content;Satellites INSTALLFOLDER vw_common {e621e022-c1f8-433f-905a-ab9a3de072b7} True True Binaries;Content;Satellites INSTALLFOLDER cs {e4e962ae-7056-4eb0-a8c5-8dc824a4b068} True True Binaries;Content;Satellites INSTALLFOLDER cs_console {01a85382-c3e9-480a-86bf-fafe4ad107a7} True True Binaries;Content;Satellites INSTALLFOLDER cs_json {9e27fa94-ab34-4736-8427-fb7a2ba90d52} True True Binaries;Content;Satellites INSTALLFOLDER cs_parallel {08636f79-5577-4af2-8eed-ec8a5bc14ac4} True True Binaries;Content;Satellites INSTALLFOLDER This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. vowpal-wabbit-8.6.1.dfsg1/cs/setup_bundle/000077500000000000000000000000001332666127000204135ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/setup_bundle/Product.wxs000066400000000000000000000027671332666127000226120ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/setup_bundle/packages.config000066400000000000000000000002021332666127000233520ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/setup_bundle/setup_bundle.wixproj000066400000000000000000000124401332666127000245310ustar00rootroot00000000000000 Debug x86 3.10 a80650fd-7dca-4b80-b52e-d22d5cae9e92 2.0 vowpalwabbit_setup Bundle $(MSBuildExtensionsPath32)\Microsoft\WiX\v3.x\Wix.targets $(MSBuildExtensionsPath)\Microsoft\WiX\v3.x\Wix.targets ..\..\vowpalwabbit\ bin\$(Configuration)\ obj\$(Configuration)\ Debug ..\..\vowpalwabbit\x64\$(Configuration)\ obj\$(Configuration)\ Debug ..\..\vowpalwabbit\x64\$(Configuration)\ obj\$(Platform)\$(Configuration)\ ..\..\vowpalwabbit\x64\$(Configuration)\ obj\$(Platform)\$(Configuration)\ Debug bin\$(Platform)\$(Configuration)\ obj\$(Platform)\$(Configuration)\ bin\$(Platform)\$(Configuration)\ obj\$(Platform)\$(Configuration)\ Debug bin\$(Platform)\$(Configuration)\ obj\$(Platform)\$(Configuration)\ bin\$(Platform)\$(Configuration)\ obj\$(Platform)\$(Configuration)\ setup {1f25d39d-71c3-4fde-a58f-d3d5d4f6dffb} True True Binaries;Content;Satellites INSTALLFOLDER $(WixExtDir)\WixNetFxExtension.dll WixNetFxExtension $(WixExtDir)\WixBalExtension.dll WixBalExtension This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. vowpal-wabbit-8.6.1.dfsg1/cs/test/000077500000000000000000000000001332666127000167015ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/test/AttributesSample.cs000066400000000000000000000145051332666127000225250ustar00rootroot00000000000000using System; using System.Collections.Generic; using System.Linq; using System.Text; using VW.Serializer.Attributes; using VW.Serializer; using VW; using VW.Labels; namespace cs_test { public static class AttributesSample { public static void Attributes() { var d1 = new DocumentFeature { Id = "d1", Time = new DateTime(2015, 1, 1), Value = new LDAFeatureVector { Values = new[] { 1.0, 2.0, 3.0 } } }; var context = new UserContext { User = new UserFeature { Age = Age.Adult, Gender = Gender.Female, Location = "WA", FeatureBag = new Dictionary { { "Foo", 1.1f }, { "Bar", 2.0f } } }, UserLDATopicPreference = new LDAFeatureVector { Values = new[] { 0.1, 0.2, 0.3 } }, ActionDependentFeatures = new List { d1, new DocumentFeature { Id = "d2", Time = new DateTime(2015,1,1), Value = new LDAFeatureVector { Values = new [] { 1.0, 2.0, 3.0 } } }, d1 } }; //var visitor = new VowpalWabbitStringVisitor(); //var serializer = VowpalWabbitSerializerFactory.CreateSerializer(visitor); //var serializerDependent = VowpalWabbitSerializerFactory.CreateSerializer(visitor); //Console.WriteLine(serializer.Serialize(context)); //foreach (var actionDependentFeature in context.ActionDependentFeatures) //{ // Console.WriteLine(serializerDependent.Serialize(actionDependentFeature)); //} //using (var pool = new ObjectPool>(() => new VowpalWabbit(""))) //{ // using (var vw = pool.Get()) // { // // do work with VW // // vw.Value.CreateEmptyExample(); // } // // don't modify this model from another thread! // var newVwModel = new VowpalWabbitModel("model init"); // pool.UpdateFactory(() => new VowpalWabbit(newVwModel)); // // this will get a new VW instance with a newer version! // using (var vw = pool.Get()) // { // // do work with VW // // vw.Value.CreateEmptyExample(); // } //} Console.ReadKey(); } public static void RunFeaturesTest() { var context = new FeatureTestContext { S = new[] { "p^the_man", "w^thew^man\u0394", "w^man" }, T = new[] { "p^un_homme", "w^un", "w^homme" } }; var vw = new VowpalWabbit("-q st --noconstant --quiet"); vw.Native.Learn("1 |s p^the_man w^the w^man |t p^un_homme w^un w^homme"); var prediction = vw.Learn(context, new SimpleLabel { Label = 1f }, VowpalWabbitPredictionType.Scalar); Console.Error.WriteLine("p2 = {0}", prediction); } } public class FeatureTestContext { [Feature(FeatureGroup = 's')] public IEnumerable S { get; set; } [Feature(FeatureGroup = 't')] public IEnumerable T { get; set; } } public class UserContext { [Feature(Namespace = "otheruser", FeatureGroup = 'o')] public UserFeature User { get; set; } [Feature(Namespace = "userlda", FeatureGroup = 'u')] public LDAFeatureVector UserLDATopicPreference { get; set; } public IReadOnlyList ActionDependentFeatures { get; set; } } [Cacheable(EqualityComparer = typeof(DocumentFeatureEqualityComparer))] public class DocumentFeature { public string Id { get; set; } public string SomeOtherId { get; set; } public DateTime Time { get; set; } // If we include this, it would result in mixing dense and non-dense features. // [Feature] public string ContentProvider { get; set; } [Feature(Namespace = "doclda", FeatureGroup = 'd')] public LDAFeatureVector Value { get; set; } } public class DocumentFeatureEqualityComparer : IEqualityComparer { public bool Equals(DocumentFeature x, DocumentFeature y) { return x.Id == y.Id && x.Time == y.Time; // maybe compare the full vector - not so sure on this part though // x.Value.Zip(y.Value, (a, b) => a == b).All(c => c); } public int GetHashCode(DocumentFeature obj) { return obj.Id.GetHashCode() + obj.Time.GetHashCode(); } } public enum Gender { Female, Male, Unknown } public class LDAFeatureVector { public string Compressed { get; set; } private double[] values; [Feature] public double[] Values { get { if (this.Compressed == null) { // e.g. call into decompression } return this.values; } set { this.values = value; } } } public class UserFeature { [Feature] public Age Age { get; set; } [Feature] public int? Income { get; set; } [Feature] public Gender Gender { get; set; } [Feature] public string Location { get; set; } [Feature] public Dictionary FeatureBag { get; set; } [Feature] public DayOfWeek DayOfWeek { get; set; } /// /// Will generate 24 parameters /// [Feature(Enumerize = true)] public int HourOfDay { get; set; } } public enum Age { Child, Adult, Elderly } } vowpal-wabbit-8.6.1.dfsg1/cs/test/NIPS2015Tutorial.cs000066400000000000000000000063321332666127000220410ustar00rootroot00000000000000using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; using VW.Labels; using VW.Serializer.Attributes; namespace cs_test { /// /// Code examples as demonstrated during NIPS 2015 Tutorial (slides: https://github.com/JohnLangford/vowpal_wabbit/wiki/Tutorial) /// public class NIPS2015Tutorial { public void BasicExample() { using (var vw = new VowpalWabbit("--quiet")) { vw.Learn("1 |f 13:3.9656971e-02 24:3.4781646e-02 69:4.6296168e-02"); var prediction = vw.Predict("|f 13:3.9656971e-02 24:3.4781646e-02 69:4.6296168e-02", VowpalWabbitPredictionType.Scalar); vw.SaveModel("output.model"); } } public class MyExample { [Feature(FeatureGroup = 'p')] public float Income { get; set; } [Feature(Enumerize = true)] public int Age { get; set; } } public static void AnnotationExample() { using (var vw = new VowpalWabbit(new VowpalWabbitSettings { EnableStringExampleGeneration = true })) { var ex = new MyExample { Income = 40, Age = 25 }; var label = new SimpleLabel { Label = 1 }; var str = vw.Serializer.Create(vw.Native).SerializeToString(ex, label); // 1 |p Income:4 | Age25 vw.Learn(ex, label); var prediction = vw.Predict(ex, VowpalWabbitPredictionType.Scalar); } } public static void MultiThreadedPrediction() { var example = new MyExample { Income = 40, Age = 25 }; var vwModel = new VowpalWabbitModel("-t -i m1.model"); using (var pool = new VowpalWabbitThreadedPrediction(vwModel)) { // thread-safe using (var vw = pool.GetOrCreate()) { // vw.Value is not thread-safe vw.Value.Predict(example); } // thread-safe pool.UpdateModel(new VowpalWabbitModel("-t -i m2.model")); } } public static async Task MultiThreadedLearning() { var example = new MyExample { Income = 40, Age = 25 }; var label = new SimpleLabel { Label = 1 }; var settings = new VowpalWabbitSettings { ParallelOptions = new ParallelOptions { MaxDegreeOfParallelism = 16 }, ExampleCountPerRun = 2000, ExampleDistribution = VowpalWabbitExampleDistribution.RoundRobin }; using (var vw = new VowpalWabbitThreadedLearning(settings)) { using (var vwManaged = vw.Create()) { var prediction = await vwManaged.Learn(example, label, VowpalWabbitPredictionType.Scalar); } var saveModelTask = vw.SaveModel("m1.model"); await vw.Complete(); await saveModelTask; } } } } vowpal-wabbit-8.6.1.dfsg1/cs/test/Program.cs000066400000000000000000000537271332666127000206550ustar00rootroot00000000000000using System; using System.IO; using VW; using System.Runtime.InteropServices; using System.Diagnostics; using cs_unittest; namespace cs_test { class Program { static void Main(string[] args) { //NIPS2015Tutorial.AnnotationExample(); //AttributesSample.Attributes(); //AttributesSample.RunFeaturesTest(); ////LabDemo.Run(); //RunFeaturesTest(); //RunParserTest(); //RunSpeedTest(); //RunFlatExampleTestEx(); //RunLDAPredict(); //RunVWParse_and_VWLearn(); RunVWTest(); //RunUnitTests(); } private static void RunUnitTests() { TestCbAdfClass tw = new TestCbAdfClass(); tw.TestCbAdfExplore(); } private static void RunVWTest() { TestJsonDictClass tst = new TestJsonDictClass(); tst.TestJsonDictThreading(); //TestCbAdfClass tst = new TestCbAdfClass(); //tst.TestCbAdfExplore(); //TestCbAdfClass tst = new TestCbAdfClass(); //tst.Test87(); //RunTestsHelper.ExecuteTest( // 125, // "-k -c -d train-sets/wsj_small.dparser.vw.gz -b 20 --search_task dep_parser --search 26 --search_alpha 1e-5 --search_rollin mix_per_roll --search_rollout oracle --one_learner --search_history_length 3 --root_label 8 --transition_system 2 --passes 8", // "train-sets/wsj_small.dparser.vw.gz", // "train-sets/ref/search_dep_parser_arceager.stderr", // ""); //string cwd = Directory.GetCurrentDirectory(); //RunTestsHelper.ExecuteTest( // 65, // "-k -c -d train-sets/er_small.vw --passes 6 --search_task entity_relation --search 10 --constraints --search_alpha 1e-8", // "train-sets/er_small.vw", // "train-sets/ref/search_er.stderr", // ""); //RunTestsHelper.ExecuteTest( // 46, // "-k -c -d train-sets/sequence_data --passes 20 --search_rollout ref --search_alpha 1e-8 --search_task sequence_demoldf --csoaa_ldf m --search 5 --holdout_off -f models/sequence_data.ldf.model --noconstant", // "train-sets/sequence_data", // "train-sets/ref/sequence_data.ldf.train.stderr", // ""); //ExecuteTest( // 1, // "-k -l 20 --initial_t 128000 --power_t 1 -d /s/vw_rajan/test/train-sets/0001.dat -f /s/vw_rajan/test/models/0001_1.model -c --passes 8 --invariant --ngram 3 --skips 1 --holdout_off", // "/s/vw_rajan/test/train-sets/0001.dat", // "/s/vw_rajan/test/train-sets/ref/0001.stderr", // ""); //ExecuteTest( // 130, // "--cb_explore_adf --bag 3 -d /s/vw_rajan/test/train-sets/cb_test.ldf --noconstant -p cbe_adf_bag.predict", // "/s/vw_rajan/test/train-sets/cb_test.ldf", // "/s/vw_rajan/test/train-sets/ref/cbe_adf_bag.stderr", // "/s/vw_rajan/test/pred-sets/ref/cbe_adf_bag.predict"); } private static void RunFeaturesTest() { // this usually requires that the library script to update train.w or its moral equivalent needs to have been run IntPtr vw = VowpalWabbitInterface.Initialize("-q st --noconstant --quiet"); IntPtr example = VowpalWabbitInterface.ReadExample(vw, "1 |s p^the_man w^the w^man |t p^un_homme w^un w^homme"); float score = VowpalWabbitInterface.Learn(vw, example); VowpalWabbitInterface.FinishExample(vw, example); VowpalWabbitInterface.FEATURE_SPACE[] featureSpace = new VowpalWabbitInterface.FEATURE_SPACE[2];//maximum number of index spaces VowpalWabbitInterface.FEATURE[] sfeatures = new VowpalWabbitInterface.FEATURE[3];// the maximum number of features VowpalWabbitInterface.FEATURE[] tfeatures = new VowpalWabbitInterface.FEATURE[3];// the maximum number of features GCHandle pinnedsFeatures = GCHandle.Alloc(sfeatures, GCHandleType.Pinned); GCHandle pinnedtFeatures = GCHandle.Alloc(tfeatures, GCHandleType.Pinned); featureSpace[0].features = pinnedsFeatures.AddrOfPinnedObject(); featureSpace[1].features = pinnedtFeatures.AddrOfPinnedObject(); GCHandle pinnedFeatureSpace = GCHandle.Alloc(featureSpace, GCHandleType.Pinned); IntPtr featureSpacePtr = pinnedFeatureSpace.AddrOfPinnedObject(); uint snum = VowpalWabbitInterface.HashSpace("s"); featureSpace[0].name = (byte)'s'; sfeatures[0].weight_index = VowpalWabbitInterface.HashFeature("p^the_man", snum); sfeatures[0].x = 1; // add the character "delta" to test unicode // do it as a string to test the marshaling is doing pinning correctly. const string s = "w^thew^man\u0394"; sfeatures[1].weight_index = VowpalWabbitInterface.HashFeature(s, snum); sfeatures[1].x = 1; sfeatures[2].weight_index = VowpalWabbitInterface.HashFeature("w^man", snum); sfeatures[2].x = 1; featureSpace[0].len = 3; uint tnum = VowpalWabbitInterface.HashSpace("t"); featureSpace[1].name = (byte)'t'; tfeatures[0].weight_index = VowpalWabbitInterface.HashFeature("p^un_homme", tnum); tfeatures[0].x = 1; tfeatures[1].weight_index = VowpalWabbitInterface.HashFeature("w^un", tnum); tfeatures[1].x = 1; tfeatures[2].weight_index = VowpalWabbitInterface.HashFeature("w^homme", tnum); tfeatures[2].x = 1; featureSpace[1].len = 3; IntPtr importedExample = VowpalWabbitInterface.ImportExample(vw, featureSpacePtr, (IntPtr)featureSpace.Length); VowpalWabbitInterface.AddLabel(importedExample, 1); score = VowpalWabbitInterface.Learn(vw, importedExample); Console.Error.WriteLine("p2 = {0}", score); VowpalWabbitInterface.Finish(vw); // clean up the memory we allocated pinnedsFeatures.Free(); pinnedtFeatures.Free(); pinnedFeatureSpace.Free(); } private static void RunParserTest() { IntPtr vw = VowpalWabbitInterface.Initialize("-q st -d ..\\..\\..\\test\\train-sets\\0002.dat -f out"); VowpalWabbitInterface.StartParser(vw, false); int count = 0; IntPtr example = IntPtr.Zero; while (IntPtr.Zero != (example = VowpalWabbitInterface.GetExample(vw))) { float label = VowpalWabbitInterface.GetLabel(example); count++; IntPtr featureSpaceLen = (IntPtr)0; IntPtr featureSpacePtr = VowpalWabbitInterface.ExportExample(vw, example, ref featureSpaceLen); VowpalWabbitInterface.FEATURE_SPACE[] featureSpace = new VowpalWabbitInterface.FEATURE_SPACE[(int)featureSpaceLen]; int featureSpace_size = Marshal.SizeOf(typeof(VowpalWabbitInterface.FEATURE_SPACE)); for (int i = 0; i < (int)featureSpaceLen; i++) { IntPtr curfeatureSpacePos = new IntPtr(featureSpacePtr.ToInt32() + i * featureSpace_size); featureSpace[i] = (VowpalWabbitInterface.FEATURE_SPACE)Marshal.PtrToStructure(curfeatureSpacePos, typeof(VowpalWabbitInterface.FEATURE_SPACE)); VowpalWabbitInterface.FEATURE[] feature = new VowpalWabbitInterface.FEATURE[featureSpace[i].len]; int feature_size = Marshal.SizeOf(typeof(VowpalWabbitInterface.FEATURE)); for (int j = 0; j < featureSpace[i].len; j++) { IntPtr curfeaturePos = new IntPtr((featureSpace[i].features.ToInt32() + j * feature_size)); feature[j] = (VowpalWabbitInterface.FEATURE)Marshal.PtrToStructure(curfeaturePos, typeof(VowpalWabbitInterface.FEATURE)); } } VowpalWabbitInterface.ReleaseFeatureSpace(featureSpacePtr, featureSpaceLen); float score = VowpalWabbitInterface.Learn(vw, example); VowpalWabbitInterface.FinishExample(vw, example); } VowpalWabbitInterface.EndParser(vw); VowpalWabbitInterface.Finish(vw); } private static void RunSpeedTest() { Console.WriteLine(DateTime.Now.Millisecond + DateTime.Now.Second * 1000 + DateTime.Now.Minute * 60 * 1000); //IntPtr vw = VowpalWabbitInterface.Initialize("--ngram 2 --skips 4 -l 0.25 -b 22 -d rcv1.train.raw.txt -f out"); //IntPtr vw = VowpalWabbitInterface.Initialize("-d rcv1.train.raw.txt -b 22 --ngram 2 --skips 4 -l 0.25 -c"); //IntPtr vw = VowpalWabbitInterface.Initialize("-d rcv1.train.raw.txt -c"); IntPtr vw = VowpalWabbitInterface.Initialize("-d ..\\..\\..\\test\\train-sets\\0002.dat"); VowpalWabbitInterface.StartParser(vw, false); int count = 0; IntPtr example = IntPtr.Zero; Stopwatch s = Stopwatch.StartNew(); while (IntPtr.Zero != (example = VowpalWabbitInterface.GetExample(vw))) { count++; float score = VowpalWabbitInterface.Learn(vw, example); VowpalWabbitInterface.FinishExample(vw, example); } s.Stop(); long t1 = s.ElapsedMilliseconds; VowpalWabbitInterface.EndParser(vw); VowpalWabbitInterface.Finish(vw); Console.WriteLine(DateTime.Now.Millisecond + DateTime.Now.Second * 1000 + DateTime.Now.Minute * 60 * 1000); Debug.WriteLine("RunSpeedTest Elapsed Time: {0} ms", s.ElapsedMilliseconds); Console.WriteLine("RunSpeedTest Elapsed Time: {0} ms", s.ElapsedMilliseconds); } private static void RunFlatExampleTestEx() { //IntPtr vw = VowpalWabbitInterface.Initialize("-q st -d rcv1.train.raw.txt -f out"); IntPtr vw = VowpalWabbitInterface.Initialize("-q st -d ..\\..\\..\\test\\train-sets\\0002.dat -f out"); VowpalWabbitInterface.StartParser(vw, false); uint stride = (uint)VowpalWabbitInterface.Get_Stride(vw); int count = 0; IntPtr example = IntPtr.Zero; while (IntPtr.Zero != (example = VowpalWabbitInterface.GetExample(vw))) { count++; float prediction = VowpalWabbitInterface.GetPrediction(example); float importance = VowpalWabbitInterface.GetImportance(example); float initial = VowpalWabbitInterface.GetInitial(example); float label = VowpalWabbitInterface.GetLabel(example); UInt32 tag_len = (UInt32)VowpalWabbitInterface.GetTagLength(example); byte[] tag = new byte[tag_len]; if (tag_len > 0) Marshal.Copy(VowpalWabbitInterface.GetTag(example), tag, 0, (int)tag_len); UInt32 num_features = (UInt32)VowpalWabbitInterface.GetFeatureNumber(example); VowpalWabbitInterface.FEATURE[] f; if (num_features > 0) { f = new VowpalWabbitInterface.FEATURE[num_features]; IntPtr feature_count = (IntPtr)0; IntPtr ret = VowpalWabbitInterface.GetFeatures(vw, example, ref feature_count); int feature_size = Marshal.SizeOf(typeof(VowpalWabbitInterface.FEATURE)); for (int i = 0; i < (int)feature_count; i++) { IntPtr curfeaturePos = new IntPtr(ret.ToInt32() + i * feature_size); f[i] = (VowpalWabbitInterface.FEATURE)Marshal.PtrToStructure(curfeaturePos, typeof(VowpalWabbitInterface.FEATURE)); } } VowpalWabbitInterface.FinishExample(vw, example); } VowpalWabbitInterface.EndParser(vw); VowpalWabbitInterface.Finish(vw); } public class VWInstanceEx { public VowpalWabbitInterface.FEATURE_SPACE[] featureSpace; public VWInstanceEx(IntPtr vw, IntPtr ex) { if (IntPtr.Zero == vw || IntPtr.Zero == ex) return; IntPtr featureSpaceLen = (IntPtr)0; IntPtr featureSpacePtr = VowpalWabbitInterface.ExportExample(vw, ex, ref featureSpaceLen); this.featureSpace = new VowpalWabbitInterface.FEATURE_SPACE[(int)featureSpaceLen]; int featureSpace_size = Marshal.SizeOf(typeof(VowpalWabbitInterface.FEATURE_SPACE)); for (int i = 0; i < (int)featureSpaceLen; i++) { IntPtr curfeatureSpacePos = new IntPtr(featureSpacePtr.ToInt32() + i * featureSpace_size); this.featureSpace[i] = (VowpalWabbitInterface.FEATURE_SPACE)Marshal.PtrToStructure(curfeatureSpacePos, typeof(VowpalWabbitInterface.FEATURE_SPACE)); VowpalWabbitInterface.FEATURE[] feature = new VowpalWabbitInterface.FEATURE[this.featureSpace[i].len]; int feature_size = Marshal.SizeOf(typeof(VowpalWabbitInterface.FEATURE)); for (int j = 0; j < this.featureSpace[i].len; j++) { IntPtr curfeaturePos = new IntPtr((this.featureSpace[i].features.ToInt32() + j * feature_size)); feature[j] = (VowpalWabbitInterface.FEATURE)Marshal.PtrToStructure(curfeaturePos, typeof(VowpalWabbitInterface.FEATURE)); } } VowpalWabbitInterface.ReleaseFeatureSpace(featureSpacePtr, (IntPtr)featureSpaceLen); } } private static void RunLDAPredict() { IntPtr vw = VowpalWabbitInterface.Initialize("-i wiki1k.model -t --quiet"); IntPtr example = VowpalWabbitInterface.ReadExample(vw, "| 0:1 2049:6 2:3 5592:1 2796:1 6151:1 6154:1 6157:2 6160:2 1027:2 6168:1 4121:1 6170:1 4124:1 29:1 35:1 2088:1 2091:1 2093:2 2095:3 4145:3 5811:1 53:1 58:1 6204:6 66:2 69:2 4167:1 6216:2 75:3 2402:1 86:1 2135:2 3126:1 4185:1 90:4 2144:1 4193:1 99:1 7185:2 2156:1 110:2 2161:1 114:2 1043:1 2165:1 2166:3 119:2 6265:1 4222:3 4224:1 4230:1 705:1 2674:1 6287:1 2192:1 145:7 2198:1 2200:2 4263:1 6312:1 5148:1 4269:3 6320:4 2227:1 4283:1 4285:2 1397:2 197:2 2246:3 2247:12 201:1 4299:1 2253:1 6351:4 6353:1 4306:1 6179:1 212:1 215:3 2264:1 3108:1 2266:1 224:1 4321:1 6372:1 229:1 2281:4 6381:1 4336:1 241:2 6388:1 2294:1 2297:1 1066:1 6402:1 6405:1 6410:7 6412:2 2322:5 2329:2 282:2 6191:1 6428:1 6431:1 6433:1 4386:21 6436:5 4390:3 6439:3 296:3 1415:3 6444:3 2350:2 2354:5 307:1 6457:3 315:1 319:1 4416:4 4419:1 325:1 326:2 6472:1 6474:1 334:2 1421:2 2384:1 1516:1 340:1 4438:1 344:2 6492:5 2401:1 354:1 4452:2 6505:4 402:3 4463:1 2418:1 2451:3 375:1 4472:1 4478:2 4479:2 2437:2 4487:1 4489:2 4493:2 2448:1 5528:1 4498:1 6547:4 6549:1 406:2 2673:1 2456:2 6554:1 4507:1 4513:1 418:3 6563:1 6566:1 5873:1 2472:10 1095:1 6572:1 4525:1 4529:2 2485:2 4535:15 6587:1 444:3 6590:1 449:1 456:1 2509:6 6221:3 6562:1 2467:1 468:1 902:2 2519:1 2607:1 4653:1 6626:1 422:1 2539:6 493:4 494:1 4591:1 6644:2 3156:1 2554:1 509:1 4606:2 2562:1 516:1 2570:2 524:2 6669:1 2576:1 2577:1 4626:1 6678:1 2584:1 6916:2 538:1 7600:1 547:2 549:2 553:9 555:1 2337:1 4655:1 567:1 5679:1 570:2 6722:2 579:2 6727:2 4793:1 586:1 590:4 2643:15 4694:14 4696:6 4698:1 603:3 4700:1 6749:1 6294:1 4704:1 613:1 4710:2 2833:1 6247:1 1469:1 6769:1 6770:1 629:1 4727:1 2682:4 640:1 642:1 6793:1 2703:1 659:6 772:1 664:1 2714:1 1135:4 3525:1 4768:2 674:1 678:1 4783:1 7624:2 690:1 115:1 1481:1 697:4 6843:1 2748:1 2753:2 6262:2 6854:1 4807:1 6856:2 2763:2 6863:1 2770:1 5923:3 6869:1 4824:2 4834:2 1489:1 2793:4 4844:2 4848:2 2801:1 755:1 2807:1 763:1 2815:2 1152:1 2818:2 2820:2 7638:1 778:1 6923:1 2831:4 6929:1 4882:1 4887:2 4888:16 6940:6 798:2 6950:2 4904:2 809:1 4907:1 4909:4 2870:1 4919:3 4922:2 2879:6 4930:1 4932:5 2892:1 842:1 6988:1 846:1 4943:1 6999:3 4952:1 864:1 4966:5 1853:2 2929:1 7026:2 5267:1 4984:1 4987:1 894:1 6440:1 7042:1 7045:1 4998:3 2953:2 7050:1 2955:3 7053:2 5014:1 836:1 5018:1 3443:2 924:4 7071:8 7072:1 930:1 936:3 5033:3 5036:1 942:2 2991:1 5047:1 7096:1 7099:2 3005:1 3006:3 3008:1 962:3 963:1 3013:1 967:2 5065:3 2419:1 5068:1 5070:1 976:1 977:1 7125:1 3031:1 7130:24 3039:3 7137:2 5090:1 5091:2 996:17 997:3 3047:2 7147:1 7149:1 5105:1 3060:1 3062:13 7159:1 5112:1 3066:4 5631:1 1022:1 1023:1 7171:1 5126:4 1032:1 5131:4 3087:1 2904:1 3090:1 7187:3 5147:1 3100:1 7200:2 7201:4 1058:2 7203:5 5156:2 7207:2 1065:6 5162:3 3116:6 5165:1 7214:1 3119:1 7222:1 5180:1 3133:1 1086:2 5183:15 7233:1 5188:2 7239:4 5192:1 1097:1 5194:2 405:1 4621:1 5200:1 3153:1 855:1 7252:2 1112:1 5211:7 7675:2 7264:1 5218:2 2235:1 5220:1 3173:1 1129:2 1130:5 3181:1 1134:1 7279:1 3184:1 3186:1 1139:1 191:1 3197:1 5248:2 5249:1 993:1 2582:1 1160:2 1165:1 7315:2 3223:1 7321:1 3229:2 4293:1 2631:1 7334:7 3239:3 7338:3 3243:1 5293:2 7344:1 7348:1 6345:3 1226:1 1216:2 3041:1 2361:1 3445:1 3273:1 7370:2 3277:1 3280:4 7378:1 7381:1 3287:4 3288:1 3295:2 6520:1 5348:1 5349:5 7398:1 3303:1 5354:1 5357:1 5358:2 7408:1 5365:2 4991:1 5372:2 7421:1 5374:8 5376:1 1921:1 7434:1 3342:1 1295:1 1296:1 3349:3 6361:1 1306:2 1583:1 5409:3 6113:1 2950:1 3975:1 5420:11 7469:1 1928:1 3381:2 1334:1 5001:5 5434:1 7391:2 1341:1 7487:1 1345:2 7491:1 5449:1 1355:1 2957:1 7505:2 5458:6 3114:1 5460:2 3641:2 7512:1 5466:1 5470:1 5350:1 7526:1 7529:1 7531:1 1388:2 5488:1 1395:3 7541:2 7546:1 1258:1 1407:1 3456:2 7555:2 7557:1 7558:1 5511:2 7560:1 7563:1 4674:1 1424:2 7576:4 3483:3 1437:2 5535:3 7584:1 5539:1 1449:1 5231:1 5548:1 5549:5 3503:1 5552:1 1458:1 5556:1 7611:1 3517:2 3317:3 5570:2 1477:6 5576:2 5577:1 3530:1 3531:1 1485:1 5585:1 7210:1 1492:1 5590:2 5591:1 3544:1 118:1 1502:1 3551:1 3558:3 1513:1 5612:1 3565:2 6397:1 5616:1 4691:2 5622:7 7671:1 3577:1 5626:1 6393:1 1532:2 5629:1 3583:2 7683:2 3590:3 7689:1 5644:1 5650:12 7699:1 5654:3 5655:1 3616:1 1569:1 1572:1 4485:3 5678:4 3631:16 5683:1 5686:1 5687:1 5688:2 5689:5 3646:4 3648:3 1608:15 951:1 5718:2 1625:2 3692:2 274:1 1646:4 3695:1 5751:1 5762:2 3727:3 3737:1 1690:3 5787:1 5794:1 3747:3 5799:4 5805:1 5808:5 3763:4 1716:2 287:1 1725:1 5825:1 7559:1 7457:4 3785:2 5834:1 1746:1 3795:1 1751:15 5859:1 1764:6 5863:1 4392:1 1789:1 5896:1 3860:3 1813:5 5912:1 1822:5 1826:1 3875:6 1828:1 3879:3 3880:1 353:2 3885:6 5934:1 3890:1 6451:2 5946:8 5947:1 3901:3 2653:3 3905:2 5955:2 3908:2 1861:1 1862:1 5959:1 1494:1 5431:1 7139:4 3925:4 5974:1 5975:1 3931:1 1884:3 881:1 1888:1 4411:1 3944:2 3948:1 3949:1 3951:2 3956:5 1910:1 3961:1 6010:1 1918:2 6016:1 320:4 5441:1 3976:1 6027:2 3985:1 1947:1 6045:3 4001:1 6811:1 4009:4 1965:1 1966:1 1967:1 328:1 6131:1 4085:2 1985:1 6083:1 4036:1 4039:1 6135:1 1996:3 6093:1 1999:1 1016:1 4054:5 4055:1 4060:1 2016:2 4432:1 4073:1 2028:5 2035:1 6133:1 2039:5 4436:1"); float score = VowpalWabbitInterface.Learn(vw, example); for (int i = 0; i < 10; i++) { float topicPrediction = VowpalWabbitInterface.GetTopicPrediction(example, (IntPtr)i); Console.Write("{0} ", topicPrediction); } Console.Write("\n"); VowpalWabbitInterface.FinishExample(vw, example); } private static void RunVWParse_and_VWLearn() { // parse and cache IntPtr vw0 = VowpalWabbitInterface.Initialize(@"-d 0002.dat -c"); VowpalWabbitInterface.StartParser(vw0, false); long instanceCount = 0; VWInstanceEx[] vwInstanceExs = new VWInstanceEx[781266]; Stopwatch s = Stopwatch.StartNew(); while (instanceCount < 781266) { IntPtr example = VowpalWabbitInterface.GetExample(vw0); if (IntPtr.Zero == example) break; vwInstanceExs[instanceCount] = new VWInstanceEx(vw0, example); VowpalWabbitInterface.FinishExample(vw0, example); instanceCount++; } VowpalWabbitInterface.EndParser(vw0); VowpalWabbitInterface.Finish(vw0); // learn instanceCount = 0; IntPtr vw = VowpalWabbitInterface.Initialize(@"--quiet --random_seed 276518665 -f save_file.reg --readable_model reable.reg"); foreach (VWInstanceEx vwInstanceEx in vwInstanceExs) { VowpalWabbitInterface.FEATURE_SPACE[] featureSpace = new VowpalWabbitInterface.FEATURE_SPACE[vwInstanceEx.featureSpace.Length]; GCHandle[] pinnedsFeatures = new GCHandle[vwInstanceEx.featureSpace.Length]; for (int i = 0; i < vwInstanceEx.featureSpace.Length; i++) { pinnedsFeatures[i] = GCHandle.Alloc(vwInstanceEx.featureSpace[i].features, GCHandleType.Pinned); featureSpace[i].features = pinnedsFeatures[i].AddrOfPinnedObject(); } GCHandle pinnedFeatureSpace = GCHandle.Alloc(featureSpace, GCHandleType.Pinned); IntPtr featureSpacePtr = pinnedFeatureSpace.AddrOfPinnedObject(); IntPtr importedExample = VowpalWabbitInterface.ImportExample(vw, featureSpacePtr, (IntPtr)vwInstanceEx.featureSpace.Length); VowpalWabbitInterface.Learn(vw, importedExample); VowpalWabbitInterface.FinishExample(vw, importedExample); for (int i = 0; i < vwInstanceEx.featureSpace.Length; i++) { pinnedsFeatures[i].Free(); } pinnedFeatureSpace.Free(); importedExample = IntPtr.Zero; instanceCount++; } VowpalWabbitInterface.Finish(vw); Debug.WriteLine("Elapsed Time: {0} ms", s.ElapsedMilliseconds); Console.WriteLine("Elapsed Time: {0} ms", s.ElapsedMilliseconds); } } } vowpal-wabbit-8.6.1.dfsg1/cs/test/Properties/000077500000000000000000000000001332666127000210355ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/test/Properties/AssemblyInfo.cs000066400000000000000000000025661332666127000237700ustar00rootroot00000000000000using System.Reflection; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; // General Information about an assembly is controlled through the following // set of attributes. Change these attribute values to modify the information // associated with an assembly. [assembly: AssemblyTitle("cs_test")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("MSIT")] [assembly: AssemblyProduct("cs_test")] [assembly: AssemblyCopyright("Copyright © MSIT 2012")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] // Setting ComVisible to false makes the types in this assembly not visible // to COM components. If you need to access a type in this assembly from // COM, set the ComVisible attribute to true on that type. [assembly: ComVisible(false)] // The following GUID is for the ID of the typelib if this project is exposed to COM [assembly: Guid("74451adb-817c-45fa-af74-71fd22936907")] // Version information for an assembly consists of the following four values: // // Major Version // Minor Version // Build Number // Revision // // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] [assembly: AssemblyVersion("1.0.0.0")] [assembly: AssemblyFileVersion("1.0.0.0")] vowpal-wabbit-8.6.1.dfsg1/cs/test/VowpalWabbitInterface.cs000066400000000000000000000145401332666127000234560ustar00rootroot00000000000000using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Runtime.InteropServices; namespace VW { using SizeT = IntPtr; using VwHandle = IntPtr; using VwFeatureSpace = IntPtr; using VwExample = IntPtr; using VwFeature = IntPtr; using BytePtr = IntPtr; public sealed class VowpalWabbitInterface { private const string LIBVW = "libvw.dll"; [StructLayout(LayoutKind.Sequential)] public struct FEATURE_SPACE { public byte name; public IntPtr features; // points to a FEATURE[] public int len; } [StructLayout(LayoutKind.Sequential)] public struct FEATURE { public float x; public uint weight_index; } [DllImport(LIBVW, EntryPoint = "VW_Initialize")] public static extern VwHandle Initialize([MarshalAs(UnmanagedType.LPWStr)]string arguments); [DllImport(LIBVW, EntryPoint = "VW_Finish")] public static extern void Finish(VwHandle vw); [DllImport(LIBVW, EntryPoint = "VW_ImportExample")] // features points to a FEATURE_SPACE[] public static extern VwExample ImportExample(VwHandle vw, VwFeatureSpace features, SizeT length); [DllImport(LIBVW, EntryPoint = "VW_ExportExample")] public static extern VwFeatureSpace ExportExample(VwHandle vw, VwExample example, ref SizeT length); [DllImport(LIBVW, EntryPoint = "VW_ReleaseFeatureSpace")] public static extern void ReleaseFeatureSpace(VwFeatureSpace fs, SizeT length); [DllImport(LIBVW, EntryPoint = "VW_ReadExample")] public static extern VwExample ReadExample(VwHandle vw, [MarshalAs(UnmanagedType.LPWStr)]string exampleString); // Have to marshal bools, C++ considers them 4 byte quantities, and C# considers them 1 byte. [DllImport(LIBVW, EntryPoint = "VW_StartParser")] public static extern void StartParser(VwHandle vw, [MarshalAs(UnmanagedType.Bool)]bool do_init); [DllImport(LIBVW, EntryPoint = "VW_EndParser")] public static extern void EndParser(VwHandle vw); [DllImport(LIBVW, EntryPoint = "VW_GetExample")] public static extern VwExample GetExample(VwHandle parser); [DllImport(LIBVW, EntryPoint = "VW_FinishExample")] public static extern void FinishExample(VwHandle vw, VwExample example); [DllImport(LIBVW, EntryPoint = "VW_GetTopicPrediction")] public static extern float GetTopicPrediction(VwExample example, SizeT i); [DllImport(LIBVW, EntryPoint = "VW_GetLabel")] public static extern float GetLabel(VwExample example); [DllImport(LIBVW, EntryPoint = "VW_GetImportance")] public static extern float GetImportance(VwExample example); [DllImport(LIBVW, EntryPoint = "VW_GetInitial")] public static extern float GetInitial(VwExample example); [DllImport(LIBVW, EntryPoint = "VW_GetMultilabelPredictions")] public static extern IntPtr GetMultilabelPredictions(VwHandle vw, VwExample example, ref SizeT length); [DllImport(LIBVW, EntryPoint = "VW_GetPrediction")] public static extern float GetPrediction(VwExample example); [DllImport(LIBVW, EntryPoint = "VW_GetTagLength")] public static extern SizeT GetTagLength(VwExample example); // Saying this returned a byte was inappropriate, because you were returning // actually a pointer to a seqeunce of bytes. (Not sure what the interpretation // of this should be, utf8 or something?) [DllImport(LIBVW, EntryPoint = "VW_GetTag")] public static extern BytePtr GetTag(VwExample example); [DllImport(LIBVW, EntryPoint = "VW_GetFeatureNumber")] public static extern SizeT GetFeatureNumber(VwExample example); // Same note regarding ref int vs size_t* [DllImport(LIBVW, EntryPoint = "VW_GetFeatures")] public static extern VwFeature GetFeatures(VwHandle vw, VwExample example, ref SizeT length); [DllImport(LIBVW, EntryPoint = "VW_ReturnFeatures")] public static extern void ReturnFeatures(VwExample features); [DllImport(LIBVW, EntryPoint = "VW_HashSpace")] public static extern uint HashSpace(VwHandle vw, [MarshalAs(UnmanagedType.LPWStr)]string s); [DllImport(LIBVW, EntryPoint = "VW_HashSpaceStatic")] public static extern uint HashSpace([MarshalAs(UnmanagedType.LPWStr)]string s, [MarshalAs(UnmanagedType.LPWStr)]string h = "strings"); // The DLL defines the last argument "u" as being an "unsigned long". // In C++ under current circumstances, both ints and longs are four byte integers. // If you wanted an eight byte integer you should use "long long" (or probably // more appropriately in this circumstance size_t). // In C#, "int" is four bytes, "long" is eight bytes. [DllImport(LIBVW, EntryPoint = "VW_HashFeature")] public static extern uint HashFeature(VwHandle vw, [MarshalAs(UnmanagedType.LPWStr)]string s, uint u); [DllImport(LIBVW, EntryPoint = "VW_HashFeatureStatic")] public static extern uint HashFeature([MarshalAs(UnmanagedType.LPWStr)]string s, uint u, [MarshalAs(UnmanagedType.LPWStr)]string h = "strings", uint numBits = 18); [DllImport(LIBVW, EntryPoint = "VW_Learn")] public static extern float Learn(VwHandle vw, VwExample example); [DllImport(LIBVW, EntryPoint = "VW_Predict")] public static extern float Predict(VwHandle vw, VwExample example); [DllImport(LIBVW, EntryPoint = "VW_AddLabel")] public static extern void AddLabel(VwExample example, float label = float.MaxValue, float weight = 1, float initial = 0); [DllImport(LIBVW, EntryPoint = "VW_Get_Weight")] public static extern float Get_Weight(VwHandle vw, SizeT index, SizeT offset); [DllImport(LIBVW, EntryPoint = "VW_Set_Weight")] public static extern void Set_Weight(VwHandle vw, SizeT index, SizeT offset, float value); [DllImport(LIBVW, EntryPoint = "VW_Num_Weights")] public static extern SizeT Num_Weights(VwHandle vw); [DllImport(LIBVW, EntryPoint = "VW_Get_Stride")] public static extern SizeT Get_Stride(VwHandle vw); [DllImport(LIBVW, EntryPoint = "VW_SaveModel")] public static extern void SaveModel(VwHandle vw); } } vowpal-wabbit-8.6.1.dfsg1/cs/test/app.config000066400000000000000000000037101332666127000206510ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/test/cs_test.csproj000066400000000000000000000324721332666127000215770ustar00rootroot00000000000000 Debug x86 8.0.30703 2.0 {D5E462FC-3DD6-4B65-A9E9-DA0B0B11D254} Exe Properties cs_test cs_test v4.6 512 false publish\ true Disk false Foreground 7 Days false false true 0 1.0.0.%2a false true ..\..\vowpalwabbit\ x86 true full false bin\Debug\ DEBUG;TRACE prompt 4 false x86 pdbonly true bin\Release\ TRACE prompt 4 false true $(SolutionDir)\x64\Debug\ DEBUG;TRACE full x64 bin\Debug\cs_test.exe.CodeAnalysisLog.xml true GlobalSuppressions.cs prompt MinimumRecommendedRules.ruleset ;C:\Program Files (x86)\Microsoft Visual Studio 10.0\Team Tools\Static Analysis Tools\\Rule Sets false ;C:\Program Files (x86)\Microsoft Visual Studio 10.0\Team Tools\Static Analysis Tools\FxCop\\Rules false false false $(SolutionDir)\x64\Release\ TRACE true pdbonly x64 bin\Release\cs_test.exe.CodeAnalysisLog.xml true GlobalSuppressions.cs prompt MinimumRecommendedRules.ruleset ;C:\Program Files (x86)\Microsoft Visual Studio 10.0\Team Tools\Static Analysis Tools\\Rule Sets false ;C:\Program Files (x86)\Microsoft Visual Studio 10.0\Team Tools\Static Analysis Tools\FxCop\\Rules false false false true bin\Debug\ DEBUG;TRACE full x86 bin\Debug\cs_test.exe.CodeAnalysisLog.xml true GlobalSuppressions.cs prompt MinimumRecommendedRules.ruleset ;C:\Program Files (x86)\Microsoft Visual Studio 10.0\Team Tools\Static Analysis Tools\\Rule Sets;C:\Program Files (x86)\Microsoft Visual Studio 10.0\Team Tools\Static Analysis Tools\\Rule Sets false ;C:\Program Files (x86)\Microsoft Visual Studio 10.0\Team Tools\Static Analysis Tools\FxCop\\Rules;C:\Program Files (x86)\Microsoft Visual Studio 10.0\Team Tools\Static Analysis Tools\FxCop\\Rules false false bin\Release\ TRACE true pdbonly x86 bin\Release\cs_test.exe.CodeAnalysisLog.xml true GlobalSuppressions.cs prompt MinimumRecommendedRules.ruleset ;C:\Program Files (x86)\Microsoft Visual Studio 10.0\Team Tools\Static Analysis Tools\\Rule Sets;C:\Program Files (x86)\Microsoft Visual Studio 10.0\Team Tools\Static Analysis Tools\\Rule Sets ;C:\Program Files (x86)\Microsoft Visual Studio 10.0\Team Tools\Static Analysis Tools\FxCop\\Rules;C:\Program Files (x86)\Microsoft Visual Studio 10.0\Team Tools\Static Analysis Tools\FxCop\\Rules false true bin\x86\DebugLeakCheck\ DEBUG;TRACE full x86 prompt MinimumRecommendedRules.ruleset true bin\x64\DebugLeakCheck\ DEBUG;TRACE full x64 false prompt MinimumRecommendedRules.ruleset false false true bin\DebugLeakCheck\ DEBUG;TRACE full x86 prompt MinimumRecommendedRules.ruleset false false {e4e962ae-7056-4eb0-a8c5-8dc824a4b068} cs {9e27fa94-ab34-4736-8427-fb7a2ba90d52} cs_json {08636f79-5577-4af2-8eed-ec8a5bc14ac4} cs_parallel {007b7de0-1d9b-498d-acfc-e9d33058f22e} cs_unittest {85e55ae0-3784-4968-9271-c81af560e1c1} vw_clr {e621e022-c1f8-433f-905a-ab9a3de072b7} vw_common False Microsoft .NET Framework 4 Client Profile %28x86 and x64%29 true False .NET Framework 3.5 SP1 Client Profile false False .NET Framework 3.5 SP1 false False Windows Installer 4.5 true Designer xcopy /y/d $(SolutionDir)\dll\$(PlatformName)\$(ConfigurationName)\libvw.* $(TargetDir) This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. vowpal-wabbit-8.6.1.dfsg1/cs/testcommon/000077500000000000000000000000001332666127000201125ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/testcommon/ITestRunner.cs000066400000000000000000000007051332666127000226650ustar00rootroot00000000000000using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace cs_testcommon { public interface ITestRunner { /// /// /// /// /// /// Null if ok, otherwise the test message string Run(string type, string method); } } vowpal-wabbit-8.6.1.dfsg1/cs/testcommon/Properties/000077500000000000000000000000001332666127000222465ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/testcommon/Properties/AssemblyInfo.cs000066400000000000000000000025721332666127000251760ustar00rootroot00000000000000using System.Reflection; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; // General Information about an assembly is controlled through the following // set of attributes. Change these attribute values to modify the information // associated with an assembly. [assembly: AssemblyTitle("cs_testcommon")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("")] [assembly: AssemblyProduct("cs_testcommon")] [assembly: AssemblyCopyright("Copyright © 2015")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] // Setting ComVisible to false makes the types in this assembly not visible // to COM components. If you need to access a type in this assembly from // COM, set the ComVisible attribute to true on that type. [assembly: ComVisible(false)] // The following GUID is for the ID of the typelib if this project is exposed to COM [assembly: Guid("32fb833b-e31d-4326-90a8-21bd8ca70c12")] // Version information for an assembly consists of the following four values: // // Major Version // Minor Version // Build Number // Revision // // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] [assembly: AssemblyVersion("1.0.0.0")] [assembly: AssemblyFileVersion("1.0.0.0")] vowpal-wabbit-8.6.1.dfsg1/cs/testcommon/cs_testcommon.csproj000066400000000000000000000063301332666127000242130ustar00rootroot00000000000000 Debug AnyCPU {6A9CBEAB-427F-4D8D-9559-B76B42B0895C} Library Properties cs_testcommon cs_testcommon v4.5.2 512 true $(SolutionDir)\x64\Debug\ DEBUG;TRACE full x64 prompt MinimumRecommendedRules.ruleset true $(SolutionDir)\x64\Release\ TRACE true pdbonly x64 prompt MinimumRecommendedRules.ruleset true ..\vw_key.snk true $(SolutionDir)\x64\DebugLeakCheck\ DEBUG;TRACE true full x64 prompt MinimumRecommendedRules.ruleset vowpal-wabbit-8.6.1.dfsg1/cs/testhelp/000077500000000000000000000000001332666127000175525ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/testhelp/App.config000066400000000000000000000037101332666127000214620ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/testhelp/DiscoverUnitTests.cs000066400000000000000000000045621332666127000235510ustar00rootroot00000000000000using cs_unittest; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Reflection; using System.Text; using System.Threading.Tasks; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace cs_testhelp { internal static class DiscoverUnitTests { internal static void Discover(string vwRoot) { // find all methods that are // - not part of types annotated by [TestClass] (as they'll already be discovered by unit test framework) // - contain "test" in method name var methodByType = from type in typeof(TestRunner).Assembly.GetTypes() where type.GetCustomAttribute() != null from method in type.GetMethods(BindingFlags.Instance | BindingFlags.Public) where method.Name.ToLowerInvariant().Contains("test") && method.GetParameters().Count() == 0 && method.GetCustomAttribute() != null && method.GetCustomAttribute() == null group method by method.DeclaringType into g select g; var outputFile = vwRoot + @"\..\cs\leaktest\TestWrapped.cs"; using (var cs = new StreamWriter(outputFile)) { cs.WriteLine(@" using Microsoft.VisualStudio.TestTools.UnitTesting; using cs_leaktest; namespace cs_unittest { [TestClass] public class TestWrapped : TestWrappedBase {"); foreach (var g in methodByType) { foreach (var method in g) { var name = method.Name; var categoryAttr = method.GetCustomAttribute(); if (categoryAttr != null) { cs.WriteLine(@" [TestCategory({0})]", string.Join(",", categoryAttr.TestCategories.Select(c => string.Format("\"{0}\"", c)))); } cs.WriteLine(@" [TestMethod] public void {1}() {{ Run(""{0}"", ""{1}""); }}", g.Key.FullName, name); } } cs.WriteLine(@" } } "); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/testhelp/Program.cs000066400000000000000000000006061332666127000215120ustar00rootroot00000000000000using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Text; using System.Text.RegularExpressions; using VW; namespace cs_testhelp { class Program { static void Main(string[] mainArgs) { var vwRoot = mainArgs[0]; DiscoverUnitTests.Discover(vwRoot); } } } vowpal-wabbit-8.6.1.dfsg1/cs/testhelp/Properties/000077500000000000000000000000001332666127000217065ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/testhelp/Properties/AssemblyInfo.cs000066400000000000000000000025661332666127000246410ustar00rootroot00000000000000using System.Reflection; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; // General Information about an assembly is controlled through the following // set of attributes. Change these attribute values to modify the information // associated with an assembly. [assembly: AssemblyTitle("cs_testhelp")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("")] [assembly: AssemblyProduct("cs_testhelp")] [assembly: AssemblyCopyright("Copyright © 2015")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] // Setting ComVisible to false makes the types in this assembly not visible // to COM components. If you need to access a type in this assembly from // COM, set the ComVisible attribute to true on that type. [assembly: ComVisible(false)] // The following GUID is for the ID of the typelib if this project is exposed to COM [assembly: Guid("f02375f2-79db-4800-8982-7c8f25ef4624")] // Version information for an assembly consists of the following four values: // // Major Version // Minor Version // Build Number // Revision // // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] [assembly: AssemblyVersion("1.0.0.0")] [assembly: AssemblyFileVersion("1.0.0.0")] vowpal-wabbit-8.6.1.dfsg1/cs/testhelp/cs_testhelp.csproj000066400000000000000000000146431332666127000233210ustar00rootroot00000000000000 Debug AnyCPU {B7122114-828E-4D86-B79E-4037C74C5F69} Exe Properties cs_testhelp cs_testhelp v4.6 512 ..\..\vowpalwabbit true $(SolutionDir)\x64\Debug\ DEBUG;TRACE full x64 prompt MinimumRecommendedRules.ruleset true $(SolutionDir)\x64\Release\ TRACE true pdbonly x64 prompt MinimumRecommendedRules.ruleset true true ..\..\vowpalwabbit\x64\DebugLeakCheck\ DEBUG;TRACE full x64 prompt MinimumRecommendedRules.ruleset true ..\..\vowpalwabbit\packages\Antlr4.Runtime.4.6.4\lib\net45\Antlr4.Runtime.dll True {6a9cbeab-427f-4d8d-9559-b76b42b0895c} cs_testcommon {007b7de0-1d9b-498d-acfc-e9d33058f22e} cs_unittest {85e55ae0-3784-4968-9271-c81af560e1c1} vw_clr This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. vowpal-wabbit-8.6.1.dfsg1/cs/testhelp/packages.config000066400000000000000000000005261332666127000225220ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/unittest/000077500000000000000000000000001332666127000176015ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/unittest/App.config000066400000000000000000000127211332666127000215130ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/unittest/ApplicationInsights.config000066400000000000000000000027731332666127000247550ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/unittest/BaseData.cs000066400000000000000000000001541332666127000215740ustar00rootroot00000000000000namespace cs_unittest { public class BaseData { public string Line { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/CbAdfData.cs000066400000000000000000000027071332666127000216670ustar00rootroot00000000000000using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using VW.Labels; using VW.Serializer.Attributes; namespace cs_unittest.cbadf { public class Generator { private static Random rand = new Random(123); public static Tuple, ContextualBanditLabel> GenerateShared(int numActions) { return Tuple.Create( new CbAdfShared { Num = rand.Next(100), Vector = Enumerable.Range(1, 500).Select(_ => (float)rand.NextDouble()).ToArray() }, Enumerable.Range(1, numActions).Select(_ => new CbAdfAction { Vector = Enumerable.Range(1, 500).Select(__ => (float)rand.NextDouble()).ToArray() }).ToList(), new ContextualBanditLabel { Action = (uint)rand.Next(numActions), Cost = rand.Next(1), Probability = (float)rand.NextDouble() }); } } public class CbAdfShared { [Feature] public int Num { get; set; } [Feature(FeatureGroup = 'x', AddAnchor = true)] public float[] Vector { get; set; } } public class CbAdfAction { [Feature(FeatureGroup = 'y', AddAnchor = true)] public float[] Vector { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/Properties/000077500000000000000000000000001332666127000217355ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/unittest/Properties/AssemblyInfo.cs000066400000000000000000000026371332666127000246670ustar00rootroot00000000000000using System; using System.Reflection; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; // General Information about an assembly is controlled through the following // set of attributes. Change these attribute values to modify the information // associated with an assembly. [assembly: AssemblyTitle("cs_unittest")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("")] [assembly: AssemblyProduct("cs_unittest")] [assembly: AssemblyCopyright("Copyright © 2015")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] // Setting ComVisible to false makes the types in this assembly not visible // to COM components. If you need to access a type in this assembly from // COM, set the ComVisible attribute to true on that type. [assembly: ComVisible(false)] // The following GUID is for the ID of the typelib if this project is exposed to COM [assembly: Guid("424e177f-656d-4926-879f-ffac8a250103")] // Version information for an assembly consists of the following four values: // // Major Version // Minor Version // Build Number // Revision // // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] [assembly: AssemblyVersion("1.0.0.0")] [assembly: AssemblyFileVersion("1.0.0.0")] [assembly: CLSCompliant(false)] vowpal-wabbit-8.6.1.dfsg1/cs/unittest/RunTests.tt000066400000000000000000000114001332666127000217350ustar00rootroot00000000000000<#@ template debug="false" hostspecific="True" language="C#" #> <#@ assembly name="System.Core" #> <#@ import namespace="System.Linq" #> <#@ import namespace="System.IO" #> <#@ import namespace="System.Globalization" #> <#@ import namespace="System.Text" #> <#@ import namespace="System.Text.RegularExpressions" #> <#@ import namespace="System.Collections.Generic" #> <#@ output extension=".cs" #> <# var testRoot = Path.Combine(Path.GetDirectoryName(Host.TemplateFile), @"..\..\test"); var lines = File.ReadAllLines(Path.Combine(testRoot, "RunTests")) .SkipWhile(l => l != "__DATA__") .ToList(); var skipList = new[] { 13, 32, 39, 40, 41, 59, 60, 61, 66, 68, 90, 25, 26, // crash 92, 95, 96, 98, 91, 99, 118, 119, 120, 14, 16, 17, 31, 33, 34,53, 101, 102, 103, 105, 106, 111, 112, // float delta 71, // --examples to test parser 143, 144, 146, 158, // native json parsing 149, 152, 156 // bash script }; var outputModels = new Dictionary(); var testcases = new Dictionary(); testcase = new TestCase(); Match match; for (var i = 0; i < lines.Count; i++) { var line = lines[i]; if (line.Trim().Length == 0) { if (skipTest) { Reset(); continue; } // resolve dependencies if (!string.IsNullOrEmpty(testcase.FinalRegressor)) outputModels[testcase.FinalRegressor] = testcase; if (!string.IsNullOrEmpty(testcase.InitialRegressor)) { TestCase dep; if (outputModels.TryGetValue(testcase.InitialRegressor, out dep)) testcase.Dependency = dep; else throw new Exception("Missing dependency: '" + testcase.InitialRegressor + "' for test case " + testcase.Id); } // if (testcase.Id == 31) // testcase.Dependency = outputModels["models/mask.model"]; if (testcases.ContainsKey(testcase.Id)) throw new Exception("Testcase IDs must be unique: duplicate # Test " + testcase.Id + " in line " + i); testcases.Add(testcase.Id, testcase); Reset(); } else if ((match = Regex.Match(line, @"^#\s*Test\s+(?\d+):?(?.*)?$")).Success) { var nr = int.Parse(match.Groups["nr"].Value); testcase.Id = nr; testcase.Comment = match.Groups["comment"].Value.Replace("\"", "\"\""); } else if ((match = Regex.Match(line, @"^\{VW\} (?.*)$")).Success) { var args = match.Groups["args"].Value; while (args.EndsWith("\\")) { args = args.Substring(0, args.Length - 1); args = args.Trim() + " " + lines[++i].Trim(); } testcase.Arguments = args; testcase.InputData = MatchArgument(args, "-d"); testcase.InitialRegressor = MatchArgument(args, "-i"); testcase.FinalRegressor = MatchArgument(args, "-f"); } else if (line.EndsWith(".stderr")) { testcase.Stderr = line.Trim(); } else if (line.EndsWith(".predict")) { testcase.Predict = line.Trim(); } else if (line.StartsWith("#") && line.Contains("SkipC#")) { skipTest = true; } } #> using Microsoft.VisualStudio.TestTools.UnitTesting; using System.IO; using System.IO.Compression; using VW; namespace cs_unittest { [TestClass] public partial class RunTests : TestBase { <# foreach (var mainTestcase in testcases.Values) { if (mainTestcase.Id == 0) continue; #> [TestMethod] [Description(@"<#=(mainTestcase.Comment ?? "").Trim()#>")] <# if (skipList.Contains(mainTestcase.Id)) { #> [Ignore] <# } #> [TestCategory("Vowpal Wabbit/Command Line")] public void CommandLine_Test<#=mainTestcase.Id#>() { <# foreach (var tc in mainTestcase.InDependencyOrder()) { #> RunTestsHelper.ExecuteTest( <#=tc.Id#>, "<#=tc.Arguments#>", "<#=tc.InputData#>", "<#=tc.Stderr#>", "<#=tc.Predict#>"); <# } #> } <# } #> } } <#+ class TestCase { public int Id; public string Arguments = ""; public string InitialRegressor; public string FinalRegressor; public string InputData = ""; public string Stderr = ""; public string Predict = ""; public string Comment; public TestCase Dependency; public List InDependencyOrder() { var tests = new List(); var dep = this; while (dep != null) { tests.Add(dep); dep = dep.Dependency; } tests.Reverse(); return tests; } } TestCase testcase; bool skipTest = false; private void Reset() { testcase = new TestCase(); skipTest = false; } private string MatchArgument(string args, string option) { var match = Regex.Match(args, Regex.Escape(option) + @"\s+(?\S+)"); return match.Success ? match.Groups["value"].Value : ""; } #>vowpal-wabbit-8.6.1.dfsg1/cs/unittest/RunTestsHelper.cs000066400000000000000000000125241332666127000230630ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.IO.Compression; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; namespace cs_unittest { public static class RunTestsHelper { private static StreamReader Open(string input) { if (input.EndsWith(".gz")) return new StreamReader(new GZipStream(new FileStream(input, FileMode.Open), CompressionMode.Decompress)); else return new StreamReader(input); } private static bool IsMultilineData(string input) { using (var streamReader = Open(input)) { string dataLine; while ((dataLine = streamReader.ReadLine()) != null) { if (string.IsNullOrWhiteSpace(dataLine)) { return true; } } } return false; } public static void ExecuteTest(int testCaseNr, string args, string input, string stderr, string predictFile) { using (var vw = new VowpalWabbit(args)) { var multiline = IsMultilineData(input); using (var streamReader = Open(input)) { if (multiline) { var lines = new List(); string dataLine; while ((dataLine = streamReader.ReadLine()) != null) { if (string.IsNullOrWhiteSpace(dataLine)) { if (lines.Count > 0) { if (args.Contains("-t")) // test only vw.Predict(lines); else vw.Learn(lines); } lines.Clear(); continue; } lines.Add(dataLine); } } else { int lineNr = 0; string[] predictions = null; if (File.Exists(predictFile)) predictions = File.ReadAllLines(predictFile); string dataLine; while ((dataLine = streamReader.ReadLine()) != null) { if (!string.IsNullOrWhiteSpace(predictFile) && File.Exists(predictFile)) { object actualValue; if (args.Contains("-t")) // test only actualValue = vw.Predict(dataLine, VowpalWabbitPredictionType.Dynamic); else actualValue = vw.Learn(dataLine, VowpalWabbitPredictionType.Dynamic); if (predictions != null) { // validate predictions var actualFloat = actualValue as float?; if (actualFloat != null) { var expectedPrediction = float.Parse(predictions[lineNr].Split(' ').First(), CultureInfo.InvariantCulture); VWTestHelper.FuzzyEqual(expectedPrediction, (float)actualFloat, 1e-4, "Prediction mismatch"); } var actualScalar = actualValue as VowpalWabbitScalar?; if (actualScalar != null) { var expectedPredictions = predictions[lineNr] .Split(' ') .Select(field => float.Parse(field, CultureInfo.InvariantCulture)) .ToArray(); Assert.AreEqual(2, expectedPredictions.Length); VWTestHelper.FuzzyEqual(expectedPredictions[0], actualScalar.Value.Value, 1e-4, "Prediction value mismatch"); VWTestHelper.FuzzyEqual(expectedPredictions[1], actualScalar.Value.Confidence, 1e-4, "Prediction confidence mismatch"); } } } else vw.Learn(dataLine); lineNr++; } } if (vw.Arguments.NumPasses > 1) vw.RunMultiPass(); else vw.EndOfPass(); if (!string.IsNullOrWhiteSpace(stderr) && File.Exists(stderr)) VWTestHelper.AssertEqual(stderr, vw.PerformanceStatistics); } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/Test1and2.cs000066400000000000000000000171121332666127000216770ustar00rootroot00000000000000using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using cs_unittest; using Microsoft.VisualStudio.TestTools.UnitTesting; using VW; using VW.Labels; using VW.Serializer.Attributes; using System.Threading; using VW.Serializer; using cs_testcommon; namespace cs_test { [TestClass] public class Test1and2Class : TestBase { [TestMethod] [TestCategory("Vowpal Wabbit/Command line through marshalling")] public void Test1and2() { var references = File.ReadAllLines(@"pred-sets\ref\0001.predict").Select(l => float.Parse(l, CultureInfo.InvariantCulture)).ToArray(); var input = new List(); using (var vwStr = new VowpalWabbit(" -k -c test1and2.str --passes 8 -l 20 --power_t 1 --initial_t 128000 --ngram 3 --skips 1 --invariant --holdout_off")) using (var vw = new VowpalWabbit(new VowpalWabbitSettings(" -k -c test1and2 --passes 8 -l 20 --power_t 1 --initial_t 128000 --ngram 3 --skips 1 --invariant --holdout_off") { EnableExampleCaching = false })) using (var vwValidate = new VowpalWabbitExampleValidator("-l 20 --power_t 1 --initial_t 128000 --ngram 3 --skips 1 --invariant --holdout_off")) { var lineNr = 0; VWTestHelper.ParseInput( File.OpenRead(@"train-sets\0001.dat"), new MyListener(data => { input.Add(data); vwValidate.Validate(data.Line, data, data.Label); var expected = vwStr.Learn(data.Line, VowpalWabbitPredictionType.Dynamic); Assert.IsInstanceOfType(expected, typeof(float)); var actual = vw.Learn(data, data.Label, VowpalWabbitPredictionType.Scalar); Assert.AreEqual((float)expected, actual, 1e-6, "Learn output differs on line: " + lineNr); lineNr++; })); vwStr.RunMultiPass(); vw.Native.RunMultiPass(); vwStr.SaveModel("models/str0001.model"); vw.Native.SaveModel("models/0001.model"); VWTestHelper.AssertEqual(@"train-sets\ref\0001.stderr", vwStr.PerformanceStatistics); VWTestHelper.AssertEqual(@"train-sets\ref\0001.stderr", vw.Native.PerformanceStatistics); } Assert.AreEqual(input.Count, references.Length); using (var vwModel = new VowpalWabbitModel(new VowpalWabbitSettings("-k -t --invariant") { ModelStream = File.OpenRead("models/0001.model") })) using (var vwInMemoryShared1 = new VowpalWabbit(new VowpalWabbitSettings { Model = vwModel })) using (var vwInMemoryShared2 = new VowpalWabbit(new VowpalWabbitSettings { Model = vwModel })) using (var vwInMemory = new VowpalWabbit(new VowpalWabbitSettings("-k -t --invariant") { ModelStream = File.OpenRead("models/0001.model") })) using (var vwStr = new VowpalWabbit("-k -t -i models/str0001.model --invariant")) using (var vwNative = new VowpalWabbit("-k -t -i models/0001.model --invariant")) using (var vw = new VowpalWabbit("-k -t -i models/0001.model --invariant")) using (var vwModel2 = new VowpalWabbitModel("-k -t --invariant -i models/0001.model")) using (var vwInMemoryShared3 = new VowpalWabbit(new VowpalWabbitSettings { Model = vwModel2 })) { for (var i = 0; i < input.Count; i++) { var actualStr = vwStr.Predict(input[i].Line, VowpalWabbitPredictionType.Scalar); var actualNative = vwNative.Predict(input[i].Line, VowpalWabbitPredictionType.Scalar); var actualInMemory = vwInMemory.Predict(input[i].Line, VowpalWabbitPredictionType.Scalar); var actual = vw.Predict(input[i], VowpalWabbitPredictionType.Scalar, input[i].Label); var actualShared1 = vwInMemoryShared1.Predict(input[i].Line, VowpalWabbitPredictionType.Scalar); var actualShared2 = vwInMemoryShared2.Predict(input[i], VowpalWabbitPredictionType.Scalar, input[i].Label); var actualShared3 = vwInMemoryShared3.Predict(input[i], VowpalWabbitPredictionType.Scalar, input[i].Label); Assert.AreEqual(references[i], actualStr, 1e-5); Assert.AreEqual(references[i], actualNative, 1e-5); Assert.AreEqual(references[i], actualInMemory, 1e-5); Assert.AreEqual(references[i], actual, 1e-5); Assert.AreEqual(references[i], actualShared1, 1e-5); Assert.AreEqual(references[i], actualShared2, 1e-5); Assert.AreEqual(references[i], actualShared3, 1e-5); } // due to shared usage the counters don't match up //VWTestHelper.AssertEqual(@"test-sets\ref\0001.stderr", vwInMemoryShared2.Native.PerformanceStatistics); //VWTestHelper.AssertEqual(@"test-sets\ref\0001.stderr", vwInMemoryShared1.PerformanceStatistics); VWTestHelper.AssertEqual(@"test-sets\ref\0001.stderr", vwInMemory.PerformanceStatistics); VWTestHelper.AssertEqual(@"test-sets\ref\0001.stderr", vwStr.PerformanceStatistics); VWTestHelper.AssertEqual(@"test-sets\ref\0001.stderr", vw.Native.PerformanceStatistics); } } } // 1|features 13:.1 15:.2 const:25 // 1|abc 13:.1 15:.2 co:25 public class Test1 { [Feature(FeatureGroup = 'f', Namespace = "eatures", Name = "const", Order = 2)] public float Constant { get; set; } [Feature(FeatureGroup = 'f', Namespace = "eatures", Order = 1)] public IList> Features { get; set; } public string Line { get; set; } public ILabel Label { get; set;} } public class Rcv1CbEval { [Feature] public string[] Words { get; set; } } public class MyListener : VowpalWabbitBaseListener { private Test1 example; private Action action; public MyListener(Action action) { this.action = action; } public override void EnterExample(VowpalWabbitParser.ExampleContext context) { this.example = new Test1() { Features = new List>() }; } public override void ExitExample(VowpalWabbitParser.ExampleContext context) { this.example.Line = context.GetText(); this.action(this.example); } public override void ExitNumber(VowpalWabbitParser.NumberContext context) { context.value = float.Parse(context.GetText(), CultureInfo.InvariantCulture); } public override void ExitLabel_simple(VowpalWabbitParser.Label_simpleContext context) { this.example.Label = new SimpleLabel() { Label = context.value.value }; } public override void ExitFeatureSparse(VowpalWabbitParser.FeatureSparseContext context) { var index = context.index.Text; var x = context.x.value; if (index == "const") { this.example.Constant = x; } else { this.example.Features.Add(new KeyValuePair(index, x)); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/Test3.cs000066400000000000000000000043601332666127000211350ustar00rootroot00000000000000using System.IO; using System.Linq; using Microsoft.VisualStudio.TestTools.UnitTesting; using TrainSet0002Dat; using VW; namespace cs_unittest { [TestClass] public class Test3Class : TestBase { [TestMethod] [TestCategory("Vowpal Wabbit/Command line through marshalling")] public void Test3() { VWTestHelper.Learn( "-k train-sets/0002.dat -f models/0002.model --invariant", @"train-sets\0002.dat", @"train-sets\ref\0002.stderr"); } [TestMethod] [TestCategory("Vowpal Wabbit/Command line through marshalling")] public void Test4and6() { VWTestHelper.Learn( "-k -d train-sets/0002.dat -f models/0002.model --invariant", @"train-sets\0002.dat", @"train-sets\ref\0002.stderr"); VWTestHelper.Predict( "-k -t --invariant -i models/0002.model", @"train-sets\0002.dat", @"pred-sets\ref\0002b.predict"); } [TestMethod] [TestCategory("Vowpal Wabbit/Command line through marshalling")] public void Test5() { VWTestHelper.Learn( "-k --initial_t 1 --adaptive --invariant -q Tf -q ff -f models/0002a.model", @"train-sets\0002.dat", @"train-sets\ref\0002a.stderr"); VWTestHelper.Predict( "-k -t --invariant -i models/0002a.model", @"train-sets\0002.dat"); } [TestMethod] [TestCategory("Vowpal Wabbit/Command line through marshalling")] [Description("using normalized adaptive updates and a low --power_t")] public void Test7and8() { VWTestHelper.Learn( "-k --power_t 0.45 -f models/0002c.model", @"train-sets\0002.dat", @"train-sets\ref\0002c.stderr"); VWTestHelper.Predict( "-k -t -i models/0002c.model", @"train-sets\0002.dat", @"pred-sets\ref\0002c.predict"); } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestAllReduce.cs000066400000000000000000000151141332666127000226320ustar00rootroot00000000000000using cs_unittest.cbadf; using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Threading; using System.Threading.Tasks; using VW; using VW.Labels; using VW.Serializer; namespace cs_unittest { [TestClass] public class TestAllReduceClass : TestBase { private static void Ingest(VowpalWabbit vw, IEnumerable> blocks) { foreach (var block in blocks) { vw.Learn(block); } vw.EndOfPass(); } private static void Ingest(VowpalWabbitThreadedLearning vw, IEnumerable> blocks) { foreach (var block in blocks) { vw.Learn(block); } } private static void Ingest(VowpalWabbitAsync vw, IEnumerable, ContextualBanditLabel>> data) { foreach (var d in data) { vw.Learn(d.Item1, d.Item2, (int)d.Item3.Action, d.Item3); } } [TestMethod] [TestCategory("Vowpal Wabbit")] public async Task TestAllReduce() { var data = Enumerable.Range(1, 1000).Select(_ => Generator.GenerateShared(10)).ToList(); var stringSerializerCompiler = (VowpalWabbitSingleExampleSerializerCompiler) VowpalWabbitSerializerFactory.CreateSerializer(new VowpalWabbitSettings { EnableStringExampleGeneration = true }); var stringSerializerAdfCompiler = (VowpalWabbitSingleExampleSerializerCompiler) VowpalWabbitSerializerFactory.CreateSerializer(new VowpalWabbitSettings { EnableStringExampleGeneration = true }); var stringData = new List>(); VowpalWabbitPerformanceStatistics statsExpected; using (var spanningTree = new SpanningTreeClr()) { spanningTree.Start(); using (var vw1 = new VowpalWabbit(new VowpalWabbitSettings(@"--total 2 --node 1 --unique_id 0 --span_server localhost --cb_adf --rank_all --interact xy") { EnableStringExampleGeneration = true })) using (var vw2 = new VowpalWabbit(new VowpalWabbitSettings(@"--total 2 --node 0 --unique_id 0 --span_server localhost --cb_adf --rank_all --interact xy") { EnableStringExampleGeneration = true } )) { var stringSerializer = stringSerializerCompiler.Func(vw1); var stringSerializerAdf = stringSerializerAdfCompiler.Func(vw1); // serialize foreach (var d in data) { var block = new List(); using (var context = new VowpalWabbitMarshalContext(vw1)) { stringSerializer(context, d.Item1, SharedLabel.Instance); block.Add(context.ToString()); } block.AddRange(d.Item2.Select((a, i) => { using (var context = new VowpalWabbitMarshalContext(vw1)) { stringSerializerAdf(context, a, i == d.Item3.Action ? d.Item3 : null); return context.ToString(); } })); stringData.Add(block); } await Task.WhenAll( Task.Factory.StartNew(() => Ingest(vw1, stringData.Take(500))), Task.Factory.StartNew(() => Ingest(vw2, stringData.Skip(500)))); vw1.SaveModel("expected.1.model"); vw2.SaveModel("expected.2.model"); statsExpected = vw1.PerformanceStatistics; } } // skip header var expected1Model = File.ReadAllBytes("expected.1.model").Skip(0x15).ToList(); var expected2Model = File.ReadAllBytes("expected.2.model").Skip(0x15).ToList(); var settings = new VowpalWabbitSettings("--cb_adf --rank_all --interact xy") { ParallelOptions = new ParallelOptions { MaxDegreeOfParallelism = 2 }, ExampleCountPerRun = 2000, ExampleDistribution = VowpalWabbitExampleDistribution.RoundRobin }; using (var vw = new VowpalWabbitThreadedLearning(settings)) { await Task.WhenAll( Task.Factory.StartNew(() => Ingest(vw, stringData.Take(500))), Task.Factory.StartNew(() => Ingest(vw, stringData.Skip(500)))); // important to enqueue the request before Complete() is called var statsTask = vw.PerformanceStatistics; var modelSave = vw.SaveModel("actual.model"); await vw.Complete(); var statsActual = await statsTask; VWTestHelper.AssertEqual(statsExpected, statsActual); await modelSave; // skip header var actualModel = File.ReadAllBytes("actual.model").Skip(0x15).ToList(); CollectionAssert.AreEqual(expected1Model, actualModel); CollectionAssert.AreEqual(expected2Model, actualModel); } using (var vw = new VowpalWabbitThreadedLearning(settings)) { var vwManaged = vw.Create(); await Task.WhenAll( Task.Factory.StartNew(() => Ingest(vwManaged, data.Take(500))), Task.Factory.StartNew(() => Ingest(vwManaged, data.Skip(500)))); // important to enqueue the request before Complete() is called var statsTask = vw.PerformanceStatistics; var modelSave = vw.SaveModel("actual.managed.model"); await vw.Complete(); var statsActual = await statsTask; VWTestHelper.AssertEqual(statsExpected, statsActual); await modelSave; // skip header var actualModel = File.ReadAllBytes("actual.managed.model").Skip(0x15).ToList(); CollectionAssert.AreEqual(expected1Model, actualModel); CollectionAssert.AreEqual(expected2Model, actualModel); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestAntlr.cs000066400000000000000000000047341332666127000220600ustar00rootroot00000000000000using System.IO; using System.Text; using Microsoft.VisualStudio.TestTools.UnitTesting; using TrainSet0002Dat; using VW.Labels; namespace cs_unittest { [TestClass] public class TestAntlrClass { [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestAntlr() { var line1 = "0.521144 1 PFF/20091028|T PFF |f t1:-0.0236849 t5:-0.10215 r5:0.727735 t10:-0.0387662 r10:0.911208 t20:-0.00777943 r20:0.952668 t40:0.014542 r40:0.832479 t60:0.00395449 r60:0.724504 t90:0.0281418 r90:0.784653"; var listener = new DataListener(); listener.Created = (line, x, plabel) => { Assert.AreEqual(line1, line); Assert.AreEqual("PFF", x.T); var label = plabel as SimpleLabel; Assert.AreEqual(0.521144, label.Label, 1e-5); Assert.AreEqual(1, label.Weight); Assert.AreEqual(13, x.F.Count); Assert.AreEqual("t1", x.F[0].Key); Assert.AreEqual(-0.0236849, x.F[0].Value, 1e-5); }; VWTestHelper.ParseInput(new MemoryStream(Encoding.UTF8.GetBytes(line1)), listener); var line2 = "1 |f 13:3.9656971e-02 24:3.4781646e-02 69:4.6296168e-02 85:6.1853945e-02 140:3.2349996e-02 156:1.0290844e-01 175:6.8493910e-02 188:2.8366476e-02 229:7.4871540e-02 230:9.1505975e-02 234:5.4200061e-02 236:4.4855952e-02 238:5.3422898e-02 387:1.4059304e-01 394:7.5131744e-02 433:1.1118756e-01 434:1.2540409e-01 438:6.5452829e-02 465:2.2644201e-01 468:8.5926279e-02 518:1.0214076e-01 534:9.4191484e-02 613:7.0990764e-02 646:8.7701865e-02 660:7.2289191e-02 709:9.0660661e-02 752:1.0580081e-01 757:6.7965068e-02 812:2.2685185e-01 932:6.8250686e-02 1028:4.8203137e-02 1122:1.2381379e-01 1160:1.3038123e-01 1189:7.1542501e-02 1530:9.2655659e-02 1664:6.5160148e-02 1865:8.5823394e-02 2524:1.6407280e-01 2525:1.1528353e-01 2526:9.7131468e-02 2536:5.7415009e-01 2543:1.4978983e-01 2848:1.0446861e-01 3370:9.2423186e-02 3960:1.5554591e-01 7052:1.2632671e-01 16893:1.9762035e-01 24036:3.2674628e-01 24303:2.2660980e-010"; listener.Created = (line, x, plabel) => { Assert.AreEqual(line2, line); var label = plabel as SimpleLabel; Assert.AreEqual(1, label.Label, 1e-5); Assert.AreEqual(49, x.F.Count); }; VWTestHelper.ParseInput(new MemoryStream(Encoding.UTF8.GetBytes(line2)), listener); } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestArguments.cs000066400000000000000000000127331332666127000227430ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Threading; using System.Threading.Tasks; using VW; namespace cs_unittest { [TestClass] public class TestArgumentsClass { [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestArguments() { using (var vw = new VowpalWabbit(new VowpalWabbitSettings("--cb_explore_adf --epsilon 0.3 --interact ud") { Verbose = true })) { // --cb_explore_adf --epsilon 0.3 --interact ud --cb_adf--csoaa_ldf multiline --csoaa_rank Console.WriteLine(vw.Arguments.CommandLine); Assert.IsTrue(vw.Arguments.CommandLine.Contains("--cb_explore_adf")); Assert.IsTrue(vw.Arguments.CommandLine.Contains("--epsilon 0.3")); Assert.IsTrue(vw.Arguments.CommandLine.Contains("--interact ud")); Assert.IsTrue(vw.Arguments.CommandLine.Contains("--csoaa_ldf multiline")); Assert.IsTrue(vw.Arguments.CommandLine.Contains("--csoaa_rank")); vw.SaveModel("args.model"); } using (var vw = new VowpalWabbit(new VowpalWabbitSettings { ModelStream = File.Open("args.model", FileMode.Open) })) { Console.WriteLine(vw.Arguments.CommandLine); // --no_stdin--bit_precision 18--cb_explore_adf--epsilon 0.300000--cb_adf--cb_type ips --csoaa_ldf multiline--csoaa_rank--interact ud Assert.IsTrue(vw.Arguments.CommandLine.Contains("--no_stdin")); Assert.IsTrue(vw.Arguments.CommandLine.Contains("--bit_precision 18")); Assert.IsTrue(vw.Arguments.CommandLine.Contains("--cb_explore_adf")); Assert.IsTrue(vw.Arguments.CommandLine.Contains("--epsilon 0.3")); Assert.IsTrue(vw.Arguments.CommandLine.Contains("--interact ud")); Assert.IsTrue(vw.Arguments.CommandLine.Contains("--csoaa_ldf multiline")); Assert.IsTrue(vw.Arguments.CommandLine.Contains("--csoaa_rank")); Assert.IsTrue(vw.Arguments.CommandLine.Contains("--cb_type ips")); Assert.IsTrue(vw.Arguments.CommandLine.Contains("--csoaa_ldf multiline")); Assert.IsTrue(vw.Arguments.CommandLine.Contains("--interact ud")); Assert.IsTrue(vw.Arguments.CommandLine.Contains("--csoaa_rank")); } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestQuietAndTestArguments() { using (var vw = new VowpalWabbit("--quiet -t")) { vw.SaveModel("args.model"); } using (var vw = new VowpalWabbitModel(new VowpalWabbitSettings { ModelStream = File.Open("args.model", FileMode.Open) })) { Assert.IsFalse(vw.Arguments.CommandLine.Contains("--quiet")); Assert.IsTrue(vw.Arguments.CommandLine.Contains("-t")); using (var vwSub = new VowpalWabbit(new VowpalWabbitSettings { Model = vw })) { Assert.IsTrue(vwSub.Arguments.CommandLine.Contains("--quiet")); Assert.IsTrue(vwSub.Arguments.CommandLine.Contains("-t")); } } using (var vw = new VowpalWabbit("")) { vw.SaveModel("args.model"); } using (var vw = new VowpalWabbitModel(new VowpalWabbitSettings { ModelStream = File.Open("args.model", FileMode.Open) })) { Assert.IsFalse(vw.Arguments.CommandLine.Contains("--quiet")); Assert.IsTrue(vw.Arguments.CommandLine.Contains("-t")); using (var vwSub = new VowpalWabbit(new VowpalWabbitSettings { Model = vw })) { Assert.IsTrue(vwSub.Arguments.CommandLine.Contains("--quiet")); Assert.IsTrue(vwSub.Arguments.CommandLine.Contains("-t")); } } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestArgumentDeDup() { using (var vw = new VowpalWabbit("-l 0.3 -l 0.3 --learning_rate 0.3 -f model1 --save_resume -q ab")) { Assert.AreEqual(0.3f, vw.Native.Arguments.LearningRate); } try { using (var vw = new VowpalWabbit( "--cb 2 --cb_type ips --cb_type dm --learning_rate 0.1 -f model_bad --save_resume -q ab")) { Assert.AreEqual(0.1f, vw.Native.Arguments.LearningRate); } Assert.Fail("Disagreering arguments not detected"); } catch (VowpalWabbitException) { } using (var vw = new VowpalWabbit("-i model1 --save_resume")) { Assert.AreEqual(0.5f, vw.Native.Arguments.LearningRate); } using (var vw = new VowpalWabbit("-i model1 --save_resume -q ab -l 0.4")) { Assert.AreEqual(0.4f, vw.Native.Arguments.LearningRate); } // make sure different representations of arguments are matched using (var vw = new VowpalWabbit("--cb_explore_adf --epsilon 0.1 -f model2")) { } using (var vw = new VowpalWabbit("--cb_explore_adf --epsilon 0.1000 -i model2")) { } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestAzure.cs000066400000000000000000000610371332666127000220650ustar00rootroot00000000000000using Microsoft.ApplicationInsights.DataContracts; using Microsoft.ApplicationInsights.Extensibility; using Microsoft.ServiceBus.Messaging; using Microsoft.VisualStudio.TestTools.UnitTesting; using Microsoft.WindowsAzure.Storage; using Microsoft.WindowsAzure.Storage.Blob; using MoreLinq; using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Diagnostics; using System.Globalization; using System.IO; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; using VW; using VW.Azure.Trainer; using VW.Azure.Trainer.Checkpoint; using VW.Serializer; namespace cs_unittest { [TestClass] public class TestAzure { public class SharedFeatures { public string Location { get; set; } } public class ActionNamespace { public float Category { get; set; } } public class ActionFeatures { [JsonProperty("b")] public ActionNamespace Namespace { get; set; } // TODO: _tag } public class Context { // TODO: _ProbabilityOfDrop [JsonIgnore] public int Index { get; set; } [JsonIgnore] public string JSON { get { return JsonConvert.SerializeObject(this); } } [JsonIgnore] public byte[] JSONAsBytes { get { return Encoding.UTF8.GetBytes(this.JSON); } } [JsonProperty("_eventid")] public string EventId { get; set; } [JsonProperty("_timestamp")] public DateTime Timestamp { get; set; } [JsonProperty("_a")] public int[] ActionIndicies { get; set; } [JsonProperty("_p")] public float[] Probabilities { get; set; } [JsonProperty("_label_action")] public int LabelAction { get; set; } [JsonProperty("_label_cost")] public float LabelCost { get; set; } [JsonProperty("_label_probability")] public float LabelProbability { get; set; } [JsonProperty("_labelindex")] public int LabelIndex { get; set; } [JsonProperty("a")] public SharedFeatures Shared { get; set; } [JsonProperty("_multi")] public ActionFeatures[] Actions { get; set; } } private static string GetConfiguration(string name) { var value = Environment.GetEnvironmentVariable(name); if (!string.IsNullOrEmpty(value)) return value.Trim(); var path = Directory.GetCurrentDirectory(); do { var filename = Path.Combine(path, "vw_azure.config"); if (File.Exists(filename)) { var q = from line in File.ReadAllLines(filename) let m = Regex.Match(line, @"^(\S+)\s*=(.*)$") where m.Success where m.Groups[1].Value == name select m.Groups[2].Value; value = q.FirstOrDefault(); if (!string.IsNullOrEmpty(value)) return value.Trim(); } var di = Directory.GetParent(path); if (di == null) Assert.Fail($"Configuration variable '{name}' not found. Search for environment variable or vw_azure.config"); path = di.FullName; } while (true); } private class OnlineTrainerWrapper : IDisposable { internal string trainArguments; internal OnlineTrainerBlobs Blobs; private string storageConnectionString = GetConfiguration("storageConnectionString"); private string inputEventHubConnectionString = GetConfiguration("inputEventHubConnectionString"); private string evalEventHubConnectionString = GetConfiguration("evalEventHubConnectionString"); private SynchronizedCollection exceptions; private LearnEventProcessorHost trainProcesserHost; internal OnlineTrainerWrapper(string trainArguments) { this.trainArguments = trainArguments; Blobs = new OnlineTrainerBlobs(storageConnectionString); // register with AppInsights to collect exceptions // need to set the instrumentation key, otherwise the processor is ignored. TelemetryConfiguration.Active.InstrumentationKey = "00000000-0000-0000-0000-000000000000"; exceptions = new SynchronizedCollection(); var builder = TelemetryConfiguration.Active.TelemetryProcessorChainBuilder; builder.Use((next) => new TestTelemetryProcessor(next, exceptions)); builder.Build(); } void AssertNoExceptionsThroughAppInsights() { Assert.AreEqual(0, exceptions.Count, string.Join("\n", exceptions.Select(e => e.Exception.Message + " " + e.Message))); } internal async Task StartAsync(ICheckpointPolicy checkpointPolicy) { trainProcesserHost = new LearnEventProcessorHost(); await trainProcesserHost.StartAsync(new OnlineTrainerSettingsInternal { CheckpointPolicy = checkpointPolicy, JoinedEventHubConnectionString = inputEventHubConnectionString, EvalEventHubConnectionString = evalEventHubConnectionString, StorageConnectionString = storageConnectionString, Metadata = new OnlineTrainerSettings { ApplicationID = "vwunittest", TrainArguments = trainArguments }, EnableExampleTracing = false, EventHubStartDateTimeUtc = DateTime.UtcNow // ignore any events that arrived before this time }); AssertNoExceptionsThroughAppInsights(); } internal async Task PollTrainerCheckpoint(Predicate predicate) { // wait for trainer to checkpoint await Blobs.PollTrainerCheckpoint(exceptions, predicate); } internal void SendData(IEnumerable data) { // send events to event hub var eventHubInputClient = EventHubClient.CreateFromConnectionString(inputEventHubConnectionString); data.ForEach(c => eventHubInputClient.Send(new EventData(c.JSONAsBytes) { PartitionKey = c.Index.ToString() })); } public void Dispose() { if (trainProcesserHost != null) { trainProcesserHost.Dispose(); trainProcesserHost = null; } } internal void TrainOffline(string message, string modelId, Dictionary data, IEnumerable eventOrder, Uri onlineModelUri, string trainArguments = null) { // allow override if (trainArguments == null) trainArguments = this.trainArguments; // train model offline using trackback var settings = new VowpalWabbitSettings(trainArguments + $" --id {modelId} --save_resume --preserve_performance_counters -f offline.model"); using (var vw = new VowpalWabbitJson(settings)) { foreach (var id in eventOrder) { var json = data[id].JSON; var progressivePrediction = vw.Learn(json, VowpalWabbitPredictionType.ActionProbabilities); // TODO: validate eval output } } using (var vw = new VowpalWabbit("-i offline.model --save_resume --readable_model offline.model.txt -f offline.reset_perf_counters.model")) { } Blobs.DownloadFile(onlineModelUri, "online.model"); using (var vw = new VowpalWabbit("-i online.model --save_resume --readable_model online.model.txt -f online.reset_perf_counters.model")) { } // validate that the model is the same CollectionAssert.AreEqual( File.ReadAllBytes("offline.reset_perf_counters.model"), File.ReadAllBytes("online.reset_perf_counters.model"), $"{message}. Offline and online model differs. Compare online.model.txt with offline.model.txt to compare"); } } private async Task RunTrainer(string args, IEnumerable data, Dictionary dataMap, int expectedNumStates, bool cleanBlobs) { var trainer = new OnlineTrainerWrapper("--cb_explore_adf --epsilon 0.1 -q ab -l 0.1"); if (cleanBlobs) trainer.Blobs.Cleanup().Wait(); // start listening for event hub await trainer.StartAsync(new CountingCheckpointPolicy(100)); // send data to event hub trainer.SendData(data); await trainer.PollTrainerCheckpoint(blobs => blobs.ModelBlobs.Count == expectedNumStates && blobs.ModelTrackbackBlobs.Count == expectedNumStates && blobs.StateJsonBlobs.Count == expectedNumStates); // download & parse trackback file trainer.Blobs.DownloadTrackbacksOrderedByTime(); foreach (var trackback in trainer.Blobs.Trackbacks) // due to checkpoint policy = 100 Assert.AreEqual(100, trackback.EventIds.Count, $"{trackback.Blob.Uri} does not contain the expected 100 events. Actual: {trackback.EventIds.Count}"); return trainer; } [TestMethod] [TestCategory("NotOnVSO")] [TestCategory("Vowpal Wabbit")] [Ignore] public async Task TestAzureTrainerRestart() { // generate data var data = GenerateData(600).ToList(); var dataMap = data.ToDictionary(d => d.EventId, d => d); var args = "--cb_explore_adf --epsilon 0.1 -q ab -l 0.1"; using (var trainer = await RunTrainer(args, data.Take(220), dataMap, expectedNumStates: 2, cleanBlobs: true)) { trainer.TrainOffline("produce the 1st model", trainer.Blobs.Trackbacks[0].ModelId, dataMap, trainer.Blobs.Trackbacks[0].EventIds, trainer.Blobs.ModelBlobs[0].Uri); // keep model for subsequent training File.Copy("offline.model", "split1.model", overwrite: true); trainer.TrainOffline("produce the 2nd model by training through all events", trainer.Blobs.Trackbacks[1].ModelId, dataMap, trainer.Blobs.Trackbacks.SelectMany(t => t.EventIds), trainer.Blobs.ModelBlobs[1].Uri); File.Copy("offline.model", "split2.model", overwrite: true); trainer.TrainOffline("produce the 2nd model by starting from the 1st and then continuing", trainer.Blobs.Trackbacks[1].ModelId, dataMap, trainer.Blobs.Trackbacks[1].EventIds, trainer.Blobs.ModelBlobs[1].Uri, "-i split1.model -l 0.1"); } // restart trainer and resume from split2.model, covering "fresh -> load" transition using (var trainer = await RunTrainer(args, data.Skip(220).Take(120), dataMap, expectedNumStates: 3, cleanBlobs: false)) { var lastTrackback = trainer.Blobs.Trackbacks.Last(); var lastBlob = trainer.Blobs.ModelBlobs.Last(); trainer.TrainOffline("produce the 3rd model by training through all events", lastTrackback.ModelId, dataMap, trainer.Blobs.Trackbacks.SelectMany(t => t.EventIds), lastBlob.Uri); trainer.TrainOffline("produce the 3rd model by starting from the 2nd and then continuing", lastTrackback.ModelId, dataMap, lastTrackback.EventIds, lastBlob.Uri, "-i split2.model -l 0.1"); File.Copy("offline.model", "split3.model", overwrite: true); } // restart ones more to cover "load -> save" using (var trainer = await RunTrainer(args, data.Skip(340).Take(120), dataMap, expectedNumStates: 4, cleanBlobs: false)) { var lastTrackback = trainer.Blobs.Trackbacks.Last(); var lastBlob = trainer.Blobs.ModelBlobs.Last(); trainer.TrainOffline("produce the 4th model by training through all events", lastTrackback.ModelId, dataMap, trainer.Blobs.Trackbacks.SelectMany(t => t.EventIds), lastBlob.Uri); trainer.TrainOffline("produce the 4th model by starting from the 3rd and then continuing", lastTrackback.ModelId, dataMap, lastTrackback.EventIds, lastBlob.Uri, "-i split3.model -l 0.1"); } } [TestMethod] [TestCategory("NotOnVSO")] [TestCategory("Vowpal Wabbit")] [Ignore] public async Task TestAzureTrainer() { using (var trainer = new OnlineTrainerWrapper("--cb_explore_adf --epsilon 0.2 -q ab")) { trainer.Blobs.Cleanup().Wait(); // generate data var data = GenerateData(100).ToList(); var dataMap = data.ToDictionary(d => d.EventId, d => d); // start listening for event hub await trainer.StartAsync(new CountingCheckpointPolicy(data.Count)); // send data to event hub trainer.SendData(data); // wait for trainer to checkpoint await trainer.PollTrainerCheckpoint(blobs => blobs.ModelBlobs.Count > 0 && blobs.ModelTrackbackBlobs.Count > 0 && blobs.StateJsonBlobs.Count > 0 ); // download & parse trackback file trainer.Blobs.DownloadTrackbacksOrderedByTime(); Assert.AreEqual(1, trainer.Blobs.Trackbacks.Count); var trackback = trainer.Blobs.Trackbacks[0]; Assert.AreEqual(data.Count, trackback.EventIds.Count); Assert.AreEqual(1, trainer.Blobs.ModelBlobs.Count); trainer.TrainOffline("train a model for this set of events", trackback.ModelId, dataMap, trackback.EventIds, trainer.Blobs.ModelBlobs[0].Uri); } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestCbProgressiveValidation() { int numExamples = 1024; foreach (var cbType in new[] { "ips", "dr", "mtr" }) { var trainArguments = $"--cb_explore_adf --epsilon 0.1 --bag 3 -q ab --power_t 0 -l 0.1 --cb_type {cbType} --random_seed 50"; int[] topActionCounts = new int[3]; using (var vw1 = new VowpalWabbitJson(trainArguments)) using (var vw2 = new VowpalWabbitJson(trainArguments)) { foreach (var ex in GenerateData(numExamples)) { var json = ex.JSON; var pred1_a = vw1.Predict(json, VowpalWabbitPredictionType.ActionProbabilities); var pred1_b = vw1.Learn(json, VowpalWabbitPredictionType.ActionProbabilities); var pred2 = vw2.Learn(json, VowpalWabbitPredictionType.ActionProbabilities); AreEqual(pred1_a, pred2, cbType); AreEqual(pred1_b, pred2, cbType); topActionCounts[pred2[0].Action]++; //Debug.WriteLine(json); //Debug.WriteLine("Prob1.pred: " + string.Join(",", pred1_a.Select(a=>$"{a.Action}:{a.Score}"))); //Debug.WriteLine("Prob1.learn: " + string.Join(",", pred1_b.Select(a=>$"{a.Action}:{a.Score}"))); //Debug.WriteLine("Prob2.learn: " + string.Join(",", pred2.Select(a=>$"{a.Action}:{a.Score}"))); //Debug.WriteLine(""); } } foreach (var count in topActionCounts) Assert.IsTrue(count < numExamples * 0.8, $"Unexpected action distribution: {count}"); Debug.WriteLine($"cb_types: {cbType} " + string.Join(",", topActionCounts.Select((count, i) => $"{i}:{count}"))); } } private static void AreEqual(ActionScore[] expected, ActionScore[] actual, string cbType) { Assert.AreEqual(expected.Length, actual.Length); for (int i = 0; i < expected.Length; i++) { Assert.AreEqual(expected[i].Action, actual[i].Action, $"cb_type: {cbType} Action mismatch at index {i}. Expected: {expected[i].Action} Actual: {actual[i].Action}"); Assert.AreEqual(expected[i].Score, actual[i].Score, $"cb_type: {cbType} Score mismatch at index {i}. Expected: {expected[i].Score} Actual: {actual[i].Score}"); } } internal class Trackback { public IListBlobItem Blob; public string ModelId; public List EventIds; public DateTime Timestamp; } private static IEnumerable GenerateData(int n) { var random = new Random(42); var locations = new[] { "east", "west" }; for (int i = 0; i < n; i++) { var action = random.Next(2); // (i % 2); var prob = (float)random.NextDouble(); var probs = action == 0 ? new[] { prob, 1 - prob } : new[] { 1 - prob, prob }; yield return new Context { Index = i, ActionIndicies = new[] { 1, 2 }, Probabilities = new[] { 0.5f, 0.5f },// probs, Timestamp = DateTime.UtcNow, EventId = Guid.NewGuid().ToString("n"), LabelAction = action + 1, // random.Next(1, 3), //action + 1, LabelCost = random.Next(4) - 2, LabelIndex = action, // action, LabelProbability = prob, Shared = new SharedFeatures { Location = locations[random.Next(2)] }, Actions = new[] { new ActionFeatures { Namespace = new ActionNamespace { Category = (float)random.NextDouble() } }, new ActionFeatures { Namespace = new ActionNamespace { Category = 1.5f + (float)random.NextDouble() } }, new ActionFeatures { Namespace = new ActionNamespace { Category = 1.5f + (float)random.NextDouble() } } //new ActionFeatures { Namespace = new ActionNamespace { Category = "games" } }, //new ActionFeatures { Namespace = new ActionNamespace { Category = "news" } } } }; } } internal class OnlineTrainerBlobs { internal CloudBlobClient BlobClient; internal CloudBlobContainer ModelContainer; internal CloudBlockBlob CurrentModel; internal CloudBlobContainer TrainerContainer; internal List ModelBlobs; internal List ModelTrackbackBlobs; internal List StateJsonBlobs; internal List Trackbacks; internal OnlineTrainerBlobs(string storageConnectionString) { this.BlobClient = CloudStorageAccount.Parse(storageConnectionString).CreateCloudBlobClient(); this.ModelContainer = this.BlobClient.GetContainerReference("mwt-models"); this.CurrentModel = this.ModelContainer.GetBlockBlobReference("current"); this.TrainerContainer = this.BlobClient.GetContainerReference("onlinetrainer"); } internal void DownloadFile(Uri uri, string filename) { new CloudBlob(uri, BlobClient.Credentials).DownloadToFile(filename, FileMode.Create); } internal async Task Cleanup() { // don't delete the container as this will trigger a conflict unless we wait... if (this.ModelContainer.Exists()) this.CurrentModel.DeleteIfExists(); if (this.TrainerContainer.Exists()) { // don't delete the container as this will trigger a conflict unless we wait... await Task.WhenAll( this.TrainerContainer.ListBlobs(useFlatBlobListing: true) .Select(blob => new CloudBlob(blob.Uri, this.BlobClient.Credentials).DeleteIfExistsAsync())); } } internal async Task PollTrainerCheckpoint(SynchronizedCollection exceptions, Predicate predicate) { // wait for files to show up for (int i = 0; i < 30; i++) { await Task.Delay(TimeSpan.FromSeconds(1)); if (exceptions.Count > 0) Assert.Fail(string.Join(";", exceptions.Select(e => e.Message))); // mwt-models if (!this.ModelContainer.Exists()) continue; // mwt-models/current if (!this.CurrentModel.Exists()) continue; // onlinetrainer if (!this.TrainerContainer.Exists()) continue; // onlinetrainer/20161128/002828 var blobs = this.TrainerContainer.ListBlobs(useFlatBlobListing: true); this.ModelBlobs = blobs.Where(b => b.Uri.ToString().EndsWith("model")) .OrderBy(uri => DateTime.ParseExact(uri.Parent.Prefix, "yyyyMMdd/HHmmss/", CultureInfo.InvariantCulture)) .ToList(); this.ModelTrackbackBlobs = blobs.Where(b => b.Uri.ToString().EndsWith("model.trackback")).ToList(); this.StateJsonBlobs = blobs.Where(b => !string.IsNullOrEmpty(b.Parent.Prefix) && b.Uri.ToString().EndsWith("state.json")).ToList(); if (predicate(this)) return; } Assert.Fail("Trainer didn't produce checkpoints"); } internal void DownloadTrackbacksOrderedByTime() { this.Trackbacks = this.ModelTrackbackBlobs.Select(b => { var trackbackStr = this.DownloadNonEmptyBlob(b); var trackback = trackbackStr.Split('\n'); // modelid: faf5e313-46bb-4852-af05-576c3a1c2c67 var m = Regex.Match(trackback[0], "^modelid: (.+)$"); Assert.IsTrue(m.Success, $"Unable to extract model id from trackback file. Line '{trackback[0]}'"); return new Trackback { Blob = b, Timestamp = DateTime.ParseExact(b.Parent.Prefix, "yyyyMMdd/HHmmss/", CultureInfo.InvariantCulture), ModelId = m.Groups[1].Value, EventIds = trackback.Skip(1).ToList() }; }) .OrderBy(x => x.Timestamp).ToList(); } internal string DownloadNonEmptyBlob(IListBlobItem blob) { using (var memStream = new MemoryStream()) { new CloudBlob(blob.Uri, this.BlobClient.Credentials).DownloadToStream(memStream); var content = Encoding.UTF8.GetString(memStream.ToArray()); Assert.IsTrue(!string.IsNullOrWhiteSpace(content), $"File is empty: '{blob.Uri}'"); return content; } } } //[TestMethod] //public void TestQueueDictionary() //{ // var qd = new QueueDictionary(); // Assert.IsNull(qd.Remove(1)); // qd.Enqueue(1, "foo"); // Assert.AreEqual("foo", qd.Remove(1)); // Assert.AreEqual(0, qd.DequeueIf(_ => true).Count()); // qd.Enqueue(1, "foo"); // qd.Enqueue(2, "bar"); // Assert.AreEqual(1, qd.DequeueIf(key => key == "foo").Count()); // Assert.AreEqual("foo", qd.Remove(1)); //} } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestBase.cs000066400000000000000000000021511332666127000216410ustar00rootroot00000000000000using System.IO; using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Linq; using VW; using System.Text.RegularExpressions; using System.Runtime.InteropServices; using System.Diagnostics; namespace cs_unittest { [TestClass] public abstract class TestBase : IDisposable { public TestBase() { this.Init(); } [TestInitialize] public void Init() { var basePath = Path.GetDirectoryName(typeof(TestBase).Assembly.Location); Environment.CurrentDirectory = basePath + @"\..\..\..\test"; if (!Directory.Exists("models")) { Directory.CreateDirectory("models"); } } public void Dispose() { try { if (Directory.Exists("models")) { Directory.Delete("models", true); } } catch (Exception ex) { Console.WriteLine("Failed to delete model directory: "+ex.Message); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestCbAdf.cs000066400000000000000000000474311332666127000217400ustar00rootroot00000000000000using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Threading.Tasks; using Microsoft.VisualStudio.TestTools.UnitTesting; using VW; using VW.Labels; using VW.Serializer.Attributes; using Newtonsoft.Json; namespace cs_unittest { [TestClass] public class TestCbAdfClass : TestBase { public void ProfilePerformanceWithStringData() { string outModelFile = "profile_cb_adf.model"; using (var vw = new VowpalWabbit("--cb_adf --rank_all")) { DataString[] sampleData = CreateStringCbAdfData(1000 * 1000); foreach (DataString example in sampleData) { vw.Learn(example, example.ActionDependentFeatures, example.SelectedActionIndex, example.Label); } vw.Native.SaveModel(outModelFile); } File.Delete(outModelFile); } public void ProfilePerformanceWithFloatData() { string outModelFile = "profile_cb_adf.model"; using (var vw = new VowpalWabbit("--cb_adf --rank_all")) { DataFloat[] sampleData = CreateFloatCbAdfData(1000 * 1000); foreach (DataFloat example in sampleData) { vw.Learn(example, example.ActionDependentFeatures, example.SelectedActionIndex, example.Label); } vw.Native.SaveModel(outModelFile); } File.Delete(outModelFile); } private void Validate(VowpalWabbitExampleValidator vwSharedValidation, VowpalWabbitExampleValidator vwADFValidation, DataString example) { vwSharedValidation.Validate(example.Line, example, SharedLabel.Instance); for (int i = 0; i < example.ActionDependentFeatures.Count; i++) { var adf = example.ActionDependentFeatures[i]; vwADFValidation.Validate(adf.Line, adf, i == example.SelectedActionIndex ? example.Label : null); } } public static void TestMemoryLeak() { string outModelFile = "cb_adf_mem_leak.model"; using (var vw = new VowpalWabbit("--cb_adf --rank_all")) { DataString[] sampleData = CreateStringCbAdfData(1000); foreach (DataString example in sampleData) { vw.Learn(example, example.ActionDependentFeatures, example.SelectedActionIndex, example.Label); } vw.Native.SaveModel(outModelFile); } var vwModel = new VowpalWabbitModel(new VowpalWabbitSettings(string.Format("--quiet -t -i {0}", outModelFile)) { MaxExampleCacheSize = 1024 }); var pool = new VowpalWabbitThreadedPrediction(vwModel); while (true) { vwModel = new VowpalWabbitModel(new VowpalWabbitSettings(string.Format("--quiet -t -i {0}", outModelFile)) { MaxExampleCacheSize = 1024 }); pool.UpdateModel(vwModel); } } [TestMethod] [TestCategory("Vowpal Wabbit/Command line through marshalling")] public void Test87() { using (var vw = new VowpalWabbit("--cb_adf --rank_all")) using (var vwSharedValidation = new VowpalWabbitExampleValidator("--cb_adf --rank_all")) using (var vwADFValidation = new VowpalWabbitExampleValidator("--cb_adf --rank_all")) { var sampleData = CreateSampleCbAdfData(); var example = sampleData[0]; Validate(vwSharedValidation, vwADFValidation, example); var result = vw.LearnAndPredict(example, example.ActionDependentFeatures, example.SelectedActionIndex, example.Label); ReferenceEquals(example.ActionDependentFeatures[0], result[0]); ReferenceEquals(example.ActionDependentFeatures[1], result[1]); ReferenceEquals(example.ActionDependentFeatures[2], result[2]); example = sampleData[1]; Validate(vwSharedValidation, vwADFValidation, example); result = vw.LearnAndPredict(example, example.ActionDependentFeatures, example.SelectedActionIndex, example.Label); ReferenceEquals(example.ActionDependentFeatures[0], result[1]); ReferenceEquals(example.ActionDependentFeatures[1], result[0]); example = sampleData[2]; Validate(vwSharedValidation, vwADFValidation, example); result = vw.Predict(example, example.ActionDependentFeatures); ReferenceEquals(example.ActionDependentFeatures[0], result[1]); ReferenceEquals(example.ActionDependentFeatures[1], result[0]); } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestSharedModel() { string cbadfModelFile = "models/cb_adf.model"; var sampleData = CreateSampleCbAdfData(); using (var vw = new VowpalWabbit("--cb_adf --rank_all")) using (var vwSharedValidation = new VowpalWabbitExampleValidator("--cb_adf --rank_all")) using (var vwADFValidation = new VowpalWabbitExampleValidator("--cb_adf --rank_all")) { foreach (DataString example in sampleData) { Validate(vwSharedValidation, vwADFValidation, example); vw.Learn(example, example.ActionDependentFeatures, example.SelectedActionIndex, example.Label); } vw.Native.SaveModel(cbadfModelFile); } // Get ground truth predictions var expectedPredictions = new List(); using (var vw = new VowpalWabbit(string.Format("-t -i {0}", cbadfModelFile))) { foreach (DataString example in sampleData) { var pred = vw.Predict(example, example.ActionDependentFeatures); if (pred == null) expectedPredictions.Add(null); else { expectedPredictions.Add(pred.Select(p => p.Feature).ToArray()); } } } // Test synchronous VW instances using shared model using (var vwModel = new VowpalWabbitModel(new VowpalWabbitSettings("-t") { ModelStream = File.OpenRead(cbadfModelFile) })) using (var vwShared1 = new VowpalWabbit(new VowpalWabbitSettings{ Model = vwModel })) using (var vwShared2 = new VowpalWabbit(new VowpalWabbitSettings{ Model = vwModel })) { for (int i = 0; i < sampleData.Length; i++) { var actualPrediction = vwShared1.Predict(sampleData[i], sampleData[i].ActionDependentFeatures); if (actualPrediction == null) ReferenceEquals(expectedPredictions[i], actualPrediction); else ReferenceEquals(expectedPredictions[i], actualPrediction.Select(p => p.Feature).ToArray()); } } // Test concurrent VW instances using shared model and model pool using (var vwModel = new VowpalWabbitModel(new VowpalWabbitSettings("-t") { ModelStream = File.OpenRead(cbadfModelFile) })) using (var vwPool = new VowpalWabbitThreadedPrediction(vwModel)) { Parallel.For ( fromInclusive: 0, toExclusive: 20, parallelOptions: new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount * 2 }, body: i => { using (var vwObject = vwPool.GetOrCreate()) { var actualPredictions = new List(); foreach (DataString example in sampleData) { actualPredictions.Add(vwObject.Value.Predict(example, example.ActionDependentFeatures).Select(p => p.Feature).ToArray()); } Assert.AreEqual(expectedPredictions.Count, actualPredictions.Count); for (int j = 0; j < expectedPredictions.Count; j++) { ReferenceEquals(expectedPredictions[j], actualPredictions[j]); } } } ); } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestCbAdfExplore() { var json = JsonConvert.SerializeObject(new { U = new { age = "18" }, _multi = new[] { new { G = new { _text = "this rocks" }, K = new { constant = 1, doc = "1" } }, new { G = new { _text = "something NYC" }, K = new { constant = 1, doc = "2" } }, }, _label_Action = 2, _label_Probability = 0.1, _label_Cost = -1, _labelIndex = 1 }); using (var vw = new VowpalWabbitJson("--cb_explore_adf --bag 4 --epsilon 0.0001 --cb_type mtr --marginal K -q UG -b 24 --power_t 0 --l1 1e-9 -l 4e-3")) { for (int i = 0; i < 50; i++) { var pred = vw.Learn(json, VowpalWabbitPredictionType.ActionProbabilities); Assert.AreEqual(2, pred.Length); if (i > 40) { Assert.AreEqual(1, (int)pred[0].Action); Assert.IsTrue(pred[0].Score > .9); Assert.AreEqual(0, (int)pred[1].Action); Assert.IsTrue(pred[1].Score < .1); } } vw.Native.SaveModel("cbadfexplore.model"); } using (var vw = new VowpalWabbitJson(new VowpalWabbitSettings { Arguments = "-t", ModelStream = File.Open("cbadfexplore.model", FileMode.Open) })) { var predObj = vw.Predict(json, VowpalWabbitPredictionType.Dynamic); Assert.IsInstanceOfType(predObj, typeof(ActionScore[])); var pred = (ActionScore[])predObj; Assert.AreEqual(1, (int)pred[0].Action); Assert.IsTrue(pred[0].Score > .9); Assert.AreEqual(0, (int)pred[1].Action); Assert.IsTrue(pred[1].Score < .1); } using (var vwModel = new VowpalWabbitModel(new VowpalWabbitSettings { ModelStream = File.Open("cbadfexplore.model", FileMode.Open) })) using (var vwSeeded = new VowpalWabbitJson(new VowpalWabbitSettings { Model = vwModel })) { var pred = vwSeeded.Predict(json, VowpalWabbitPredictionType.ActionProbabilities); Assert.AreEqual(1, (int)pred[0].Action); Assert.IsTrue(pred[0].Score > .9); Assert.AreEqual(0, (int)pred[1].Action); Assert.IsTrue(pred[1].Score < .1); } using (var vwModel = new VowpalWabbitModel(new VowpalWabbitSettings { ModelStream = File.Open("cbadfexplore.model", FileMode.Open) })) { using (var vwPool = new VowpalWabbitJsonThreadedPrediction(vwModel)) using (var vw = vwPool.GetOrCreate()) { var predObj = vw.Value.Predict(json, VowpalWabbitPredictionType.Dynamic); Assert.IsInstanceOfType(predObj, typeof(ActionScore[])); var pred = (ActionScore[])predObj; Assert.AreEqual(1, (int)pred[0].Action); Assert.IsTrue(pred[0].Score > .9); Assert.AreEqual(0, (int)pred[1].Action); Assert.IsTrue(pred[1].Score < .1); } } } private DataString[] CreateSampleCbAdfData() { var sampleData = new DataString[3]; //shared | s_1 s_2 //0:1.0:0.5 | a_1 b_1 c_1 //| a_2 b_2 c_2 //| a_3 b_3 c_3 //| b_1 c_1 d_1 //0:0.0:0.5 | b_2 c_2 d_2 //| a_1 b_1 c_1 //| a_3 b_3 c_3 sampleData[0] = new DataString { Line = "shared | s_1 s_2", Shared = new[] { "s_1", "s_2" }, ActionDependentFeatures = new[] { new DataStringADF { Line = "0:1.0:0.5 | a_1 b_1 c_1", Features = new[] { "a_1", "b_1", "c_1" }, }, new DataStringADF { Line = "| a_2 b_2 c_2", Features = new [] { "a_2","b_2","c_2" } }, new DataStringADF { Line = "| a_3 b_3 c_3", Features = new [] { "a_3","b_3","c_3" } }, }, SelectedActionIndex = 0, Label = new ContextualBanditLabel { Cost = 1f, Probability = .5f } }; sampleData[1] = new DataString { Line = string.Empty, ActionDependentFeatures = new[] { new DataStringADF { Line = "| b_1 c_1 d_1", Features = new [] { "b_1","c_1","d_1" } }, new DataStringADF { Line = "0:0.0:0.5 | b_2 c_2 d_2", Features = new [] { "b_2", "c_2", "d_2" } }, }, SelectedActionIndex = 1, Label = new ContextualBanditLabel { Cost = 0f, Probability = .5f } }; sampleData[2] = new DataString { Line = string.Empty, ActionDependentFeatures = new[] { new DataStringADF { Line = "| a_1 b_1 c_1 ", Features = new [] { "a_1","b_1","c_1" } }, new DataStringADF { Line = "| a_3 b_3 c_3", Features = new [] { "a_3","b_3","c_3" } } } }; return sampleData; } private static DataString[] CreateStringCbAdfData(int numSamples, int randomSeed = 0) { var random = new Random(randomSeed); var sampleData = new DataString[numSamples]; for (int i = 0; i < numSamples; i++) { int numActions = random.Next(2, 5); int[] fIndex = Enumerable.Range(1, numActions).OrderBy(ind => random.Next()).Take(numActions).ToArray(); var features = new string[numActions][]; for (int j = 0; j < numActions; j++) { features[j] = new string[] { "a_" + fIndex[j], "b_" + fIndex[j], "c_" + fIndex[j], "d_" + fIndex[j] }; } var adf = new DataStringADF[numActions]; for (int j = 0; j < numActions; j++) { adf[j] = new DataStringADF { Features = features[j] }; } sampleData[i] = new DataString { ActionDependentFeatures = adf, SelectedActionIndex = random.Next(-1, numActions), Label = new ContextualBanditLabel { Cost = (float)random.NextDouble(), Probability = (float)random.NextDouble() } }; } return sampleData; } private DataFloat[] CreateFloatCbAdfData(int numSamples, int randomSeed = 0) { var random = new Random(randomSeed); var sampleData = new DataFloat[numSamples]; for (int i = 0; i < numSamples; i++) { int numActions = random.Next(2, 5); int[] fIndex = Enumerable.Range(1, numActions).OrderBy(ind => random.Next()).Take(numActions).ToArray(); var features = new float[numActions][]; for (int j = 0; j < numActions; j++) { features[j] = new float[] { (fIndex[j] + 0) / (float)numActions, (fIndex[j] + 1) / (float)numActions, (fIndex[j] + 2) / (float)numActions, (fIndex[j] + 3) / (float)numActions }; } var adf = new DataFloatADF[numActions]; for (int j = 0; j < numActions; j++) { adf[j] = new DataFloatADF { Features = features[j] }; } sampleData[i] = new DataFloat { ActionDependentFeatures = adf, SelectedActionIndex = random.Next(-1, numActions), Label = new ContextualBanditLabel { Cost = (float)random.NextDouble(), Probability = (float)random.NextDouble() } }; } return sampleData; } public class DataString { public string Line { get; set; } [Feature] public string[] Shared { get; set; } public IReadOnlyList ActionDependentFeatures { get; set; } public int SelectedActionIndex { get; set; } public ILabel Label { get; set; } } public class DataFloat { [Feature] public string[] Shared { get; set; } public IReadOnlyList ActionDependentFeatures { get; set; } public int SelectedActionIndex { get; set; } public ILabel Label { get; set; } } public class DataStringADF { public string Line { get; set; } [Feature] public string[] Features { get; set; } public override string ToString() { return string.Join(" ", this.Features); } } public class DataFloatADF { [Feature] public float[] Features { get; set; } public override string ToString() { return string.Join(" ", this.Features); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestConfidence.cs000066400000000000000000000047401332666127000230320ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; using VW.Labels; using VW.Serializer.Attributes; namespace cs_unittest { [TestClass] public class TestConfidenceClass { public class Data { [Feature] public double Value { get; set; } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestConfidence() { var rnd = new Random(42); using (var vw = new VowpalWabbit("--confidence -f model.conf --ngram 1 --bit_precision 8 --random_seed 123 --passes 2 -k -c model.conf.cache")) { for (int i = 0; i < 100; i++) { if (i % 2 == 0) vw.Learn(new Data { Value = rnd.NextDouble() }, new SimpleLabel { Label = 1 }); else vw.Learn(new Data { Value = rnd.NextDouble() + 3 }, new SimpleLabel { Label = -1 }); } vw.Native.RunMultiPass(); var pred = vw.Predict(new Data { Value = 4.5 }, VowpalWabbitPredictionType.ScalarConfidence); Assert.AreEqual(-1f, pred.Value); Assert.IsTrue(pred.Confidence > 5); } Assert.IsTrue(File.Exists("model.conf.cache.cache")); File.Delete("model.conf.cache.cache"); Assert.IsFalse(File.Exists("model.conf.cache.cache")); } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestConfidenceWithStringLines() { var rnd = new Random(42); using (var vw = new VowpalWabbit("--confidence -f model.conf --ngram 1 --bit_precision 18 --random_seed 123 --passes 2 -k -c model.conf.cache")) { for (int i = 0; i < 100; i++) { if (i % 2 == 0) vw.Learn(string.Format("{0} | :{1}", 1, rnd.NextDouble())); else vw.Learn(string.Format("{0} | :{1}", -1, rnd.NextDouble() + 3)); } vw.Native.RunMultiPass(); var pred = vw.Predict(string.Format(" | :{0}", 4.5), VowpalWabbitPredictionType.ScalarConfidence); Assert.AreEqual(-1f, pred.Value); Assert.IsTrue(pred.Confidence > 5); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestConfigInspector.cs000066400000000000000000000071061332666127000240700ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; using VW.Labels; using VW.Serializer; namespace cs_unittest { [TestClass] public class TestConfigInspector { [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestConfigParsing() { var str = @" f1 ns1.f2(FeatureGroup = 'a') ns1.f3(Namespace = ""a"",Enumerize=true, FeatureGroup = 'g' ) ns1.f4(Enumerize=true , FeatureGroup ='f', Order=3,AddAnchor=true) ns1.ns2.f5 "; var schema = ConfigInspector.CreateSchema(typeof(ConfigSample), str, msg => Assert.Fail(msg)); using (var vw = new VowpalWabbitExampleValidator(new VowpalWabbitSettings { Schema = schema })) { vw.Validate("| f1:1 f5:5 |a abc |ga f33 |f f44 ", new ConfigSample { f1 = 1, ns1 = new ConfigSampleNamespace { f2 = "abc", f3 = 3, f4 = 4, ns2 = new ConfigSampleNamespaceSub { f5 = 5 } } }); } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestConfigADFParsing() { var schemaShared = ConfigInspector.CreateSchema(typeof(ConfigShared), "f1", msg => Assert.Fail(msg)); var schemaADF = ConfigInspector.CreateSchema(typeof(ConfigADF), "f2(Enumerize=true)", msg => Assert.Fail(msg)); using (var vw = new VowpalWabbit(new VowpalWabbitSettings("--cb_adf") { Schema = schemaShared, ActionDependentSchema = schemaADF })) using (var vwNative = new VowpalWabbit("--cb_adf")) { vw.Learn( new ConfigShared { f1 = 2, ignore_me = 3 }, new[] { new ConfigADF { f2 = 3 }, new ConfigADF { f2 = 4 }, }, 0, new ContextualBanditLabel { Action = 0, Cost = 1, Probability = .5f }); vwNative.Learn( new[] { "shared | f1:2", "0:1:.5 | f23", " | f24" }); vw.Native.SaveModel("config-actual.model"); vwNative.SaveModel("config-expected.model"); } var actual = File.ReadAllBytes("config-actual.model"); var expected = File.ReadAllBytes("config-expected.model"); CollectionAssert.AreEqual(expected, actual); } } public class ConfigSample { public int f1 { get; set; } public ConfigSampleNamespace ns1 { get; set; } } public class ConfigSampleNamespace { public string f2 { get; set; } public int f3 { get; set; } public int f4 { get; set; } public ConfigSampleNamespaceSub ns2 { get; set; } } public class ConfigSampleNamespaceSub { public int f5 { get; set; } } public class ConfigShared { public int f1 { get; set; } public int ignore_me { get; set; } } public class ConfigADF { public int f2 { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestConsole.cs000066400000000000000000000025651332666127000224020ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; namespace cs_unittest { [TestClass] public class TestConsoleClass : TestBase { [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestConsole() { var arrayModelPath = Path.GetTempFileName(); var newlineModelPath = Path.GetTempFileName(); var nativeModelPath = Path.GetTempFileName(); // Note: deployment item is not working on build server cs_vw.Program.Main(new[] { @"..\cs\unittest\json\test_array.json", "-f", arrayModelPath }); cs_vw.Program.Main(new[] { @"..\cs\unittest\json\test_newline.json", "-f", newlineModelPath }); // compare model using (var vw = new VowpalWabbit("-f " + nativeModelPath)) { vw.Learn("1 | f:1"); vw.Learn("0 | f:2"); } var arrayModel = File.ReadAllBytes(arrayModelPath); var newlineModel = File.ReadAllBytes(newlineModelPath); var nativeModel = File.ReadAllBytes(nativeModelPath); CollectionAssert.AreEqual(nativeModel, arrayModel); CollectionAssert.AreEqual(newlineModel, arrayModel); } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestDynamic.cs000066400000000000000000000066141332666127000223630ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; using VW.Labels; using VW.Serializer; namespace cs_unittest { [TestClass] public class TestDynamicClass { [TestMethod] [Ignore] [TestCategory("Vowpal Wabbit")] public void TestDynamic() { // TODO: look into friend assemblies and how to figure if one is a friend using (var vw = new VowpalWabbit("--cb_adf --rank_all")) using (var vwDynamic = new VowpalWabbitDynamic(new VowpalWabbitSettings("--cb_adf --rank_all") { TypeInspector = JsonTypeInspector.Default })) { var expected = vw.Learn(new[] { "| q:1", "2:-3:0.9 | q:2", "| q:3" }, VowpalWabbitPredictionType.ActionProbabilities); var actual = vwDynamic.Learn( new { _multi = new[] { new { q = 1 }, new { q = 2 }, new { q = 3 } } }, VowpalWabbitPredictionType.ActionScore, new ContextualBanditLabel(0, -3, 0.9f), 1); AssertAreEqual(expected, actual); expected = vw.Learn(new[] { "| q:1", "2:-5:0.9 | q:2", "| q:3" }, VowpalWabbitPredictionType.ActionProbabilities); actual = vwDynamic.Learn( new { _multi = new[] { new { q = 1 }, new { q = 2 }, new { q = 3 } } }, VowpalWabbitPredictionType.ActionScore, new ContextualBanditLabel(0, -5, 0.9f), 1); AssertAreEqual(expected, actual); expected = vw.Learn(new[] { "| q:1", "| q:2", "3:-2:0.8 | q:3" }, VowpalWabbitPredictionType.ActionProbabilities); actual = vwDynamic.Learn( new { _multi = new[] { new { q = 1 }, new { q = 2 }, new { q = 3 } }, _labelIndex = 2, _label_Action = 3, _label_Cost = -2, _label_Probability = 0.8 }, VowpalWabbitPredictionType.ActionScore); AssertAreEqual(expected, actual); } } private void AssertAreEqual(ActionScore[] expected, ActionScore[] actual) { Assert.AreEqual(expected.Length, actual.Length); for (int i = 0; i < expected.Length; i++) { Assert.AreEqual(expected[i].Action, actual[i].Action); Assert.AreEqual(expected[i].Score, actual[i].Score, 0.0001); } CollectionAssert.AreEqual( expected: Enumerable.Range(0, expected.Length).Select(i => (uint)i).ToList(), actual: actual.Select(a => a.Action).OrderBy(a => a).ToList()); } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestErrorListener.cs000066400000000000000000000010241332666127000235640ustar00rootroot00000000000000using Antlr4.Runtime; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace cs_unittest { class TestErrorListener : IAntlrErrorListener { public void SyntaxError(IRecognizer recognizer, IToken offendingSymbol, int line, int charPositionInLine, string msg, RecognitionException e) { Assert.Fail("SyntaxError: {0} at line {1} character {2}: {3}", offendingSymbol, line, charPositionInLine, msg); } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestExampleCache.cs000066400000000000000000000103561332666127000233140ustar00rootroot00000000000000using System; using System.Collections.Generic; using System.IO; using Microsoft.VisualStudio.TestTools.UnitTesting; using VW; using VW.Labels; using VW.Serializer; using VW.Serializer.Attributes; namespace cs_unittest { [TestClass] public class TestExampleCacheCases : TestBase { #if DEBUG [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestExampleCacheForLearning() { try { using (var vw = new VowpalWabbit(new VowpalWabbitSettings { EnableExampleCaching = true })) { vw.Learn(new CachedData(), new SimpleLabel()); } Assert.Fail("Expect NotSupportedException"); } catch (NotSupportedException) { } } #else [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestExampleCacheForLearning() { try { using (var vw = new VowpalWabbit(new VowpalWabbitSettings { EnableExampleCaching = true })) { vw.Learn(new CachedData(), new SimpleLabel()); } Assert.Fail("Expect NullReferenceException"); } catch (NullReferenceException) { } } #endif [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestExampleCacheDisabledForLearning() { using (var vw = new VowpalWabbit(new VowpalWabbitSettings { EnableExampleCaching = false })) { vw.Learn(new CachedData(), new SimpleLabel()); } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestExampleCache() { var random = new Random(123); var examples = new List(); for (int i = 0; i < 1000; i++) { examples.Add(new CachedData { Label = new SimpleLabel { Label = 1 }, Feature = random.NextDouble() }); var cachedData = new CachedData { Label = new SimpleLabel { Label = 2 }, Feature = 10 + random.NextDouble() }; examples.Add(cachedData); examples.Add(cachedData); } using (var vw = new VowpalWabbit(new VowpalWabbitSettings("-k -c --passes 10") { EnableExampleCaching = false })) { foreach (var example in examples) { var pred = vw.Learn(example, example.Label, VowpalWabbitPredictionType.Scalar); //Console.WriteLine($"feature {example.Label.Label} <- {example.Feature}"); //Console.WriteLine($" pred {pred}"); } vw.Native.RunMultiPass(); vw.Native.SaveModel("models/model1"); } using (var vwModel = new VowpalWabbitModel(new VowpalWabbitSettings("-t") { ModelStream = File.OpenRead("models/model1") })) using (var vwCached = new VowpalWabbit(new VowpalWabbitSettings { Model = vwModel, EnableExampleCaching = true, MaxExampleCacheSize = 5 })) using (var vw = new VowpalWabbit(new VowpalWabbitSettings { Model = vwModel, EnableExampleCaching = false })) { foreach (var example in examples) { var cachedPrediction = vwCached.Predict(example, VowpalWabbitPredictionType.Scalar); var prediction = vw.Predict(example, VowpalWabbitPredictionType.Scalar); Assert.AreEqual(prediction, cachedPrediction); //Console.WriteLine($"{example.Label.Label} to {prediction} to {cachedPrediction} {example.Feature}"); Assert.AreEqual(example.Label.Label, Math.Round(prediction)); } } } } [Cacheable] public class CachedData { [Feature] public double Feature { get; set; } public SimpleLabel Label { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestExpansion.cs000066400000000000000000000020651332666127000227370ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; using VW.Serializer.Attributes; namespace cs_unittest { [TestClass] public class TestExpansionClass { [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] [TestCategory("Vowpal Wabbit")] public void TestExpansion() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| 3:.1 4:.2 5:.3", new ExpansionContext() { Features = new[] { .1f, .2f, .3f }, Offset = 3 }); } } } public class ExpansionContext { public float[] Features { get; set; } public int Offset { get; set; } [Feature] public IEnumerable ExpandedFeatures { get { return Enumerable.Repeat(0f, this.Offset) .Concat(Features); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestFeatureExtraction.cs000066400000000000000000000105071332666127000244270ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; using VW.Serializer; using VW.Serializer.Attributes; namespace cs_unittest { [TestClass] public class TestFeatureExtractionClass { public class Features { [Feature] public float F1 { get; set; } [Feature(FeatureGroup = 'l')] public string Location { get; set; } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestFeatureExtraction() { using (var vw = new VowpalWabbit("--noconstant")) using (var serializer = vw.Serializer.Create(vw.Native)) using (var example = serializer.Serialize(new Features { F1 = 3.2f, Location = "New York" })) { var singleExample = example as VowpalWabbitSingleLineExampleCollection; Assert.IsNotNull(singleExample); foreach (var ns in singleExample.Example) { Console.WriteLine(ns.Index); foreach (var feature in ns) { Console.WriteLine("{0}:{1}", feature.FeatureIndex, feature.X); } } var namespaces = singleExample.Example.ToArray(); Assert.AreEqual((byte)' ', namespaces[0].Index); CollectionAssert.AreEqual( new[] { new VowpalWabbitFeature(singleExample.Example, 3.2f, 610696), }, namespaces[0].ToArray()); Assert.AreEqual((byte)'l', namespaces[1].Index); CollectionAssert.AreEqual( new[] { new VowpalWabbitFeature(singleExample.Example, 1, 414696), new VowpalWabbitFeature(singleExample.Example, 1, 380324), }, namespaces[1].ToArray()); } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestJsonFeatureExtraction() { string json = "{\"ns1\":{\"location\":\"New York\", \"f2\":3.4}}"; using (var vw = new VowpalWabbit("-b 3 --noconstant")) using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var result = serializer.ParseAndCreate(json)) { var singleExample = result as VowpalWabbitSingleLineExampleCollection; Assert.IsNotNull(singleExample); if (singleExample != null) { foreach (var ns in singleExample.Example) { Console.WriteLine(ns.Index); foreach (var feature in ns) { Console.WriteLine("{0}:{1}", feature.FeatureIndex, feature.X); } } var ns1 = singleExample.Example.ToArray(); Assert.AreEqual(1, ns1.Length); Assert.AreEqual((byte)'n', ns1[0].Index); CollectionAssert.AreEqual( new[] { new VowpalWabbitFeature(singleExample.Example, 1, 12), new VowpalWabbitFeature(singleExample.Example, 3.4f, 28) }, ns1[0].ToArray()); } // for documentation purpose only var multiExample = result as VowpalWabbitMultiLineExampleCollection; Assert.IsNull(multiExample); if (multiExample != null) { foreach (var example in multiExample.Examples) { foreach (var ns in example) { Console.WriteLine(ns.Index); foreach (var feature in ns) { Console.WriteLine("{0}:{1}", feature.FeatureIndex, feature.X); } } } } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestJson.cs000066400000000000000000002467051332666127000217170ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using Newtonsoft.Json; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; using VW.Labels; using VW.Serializer; using VW.Serializer.Attributes; using VW.Serializer.Intermediate; namespace cs_unittest { [TestClass] public class TestJsonClass { [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJson() { using (var validator = new VowpalWabbitExampleJsonValidator()) { validator.Validate("|a foo:1", "{\"a\":{\"foo\":1}}"); validator.Validate("|a foo:2.3", "{\"a\":{\"foo\":2.3}}"); validator.Validate("|a foo:2.3 bar", "{\"a\":{\"foo\":2.3, \"bar\":true}}"); validator.Validate("|a foo:1 |bcd Age25_old", "{\"a\":{\"foo\":1},\"bcd\":{\"Age\":\"25 old\"}}"); validator.Validate("|a x{abc}", "{\"a\":{\"x\":\"{abc}\"}}"); validator.Validate("|a x{abc}", "{\"a\":{\"x\":\"{abc}\",\"y\":null}}"); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonAux() { using (var validator = new VowpalWabbitExampleJsonValidator()) { validator.Validate("|a foo:1", "{\"a\":{\"foo\":1},\"_aux\":5}"); validator.Validate("|a foo:1", "{\"a\":{\"foo\":1},\"_aux\":\"\"}"); validator.Validate("|a foo:1", "{\"a\":{\"foo\":1},\"_aux\":{\"abc\":{\"def\":3}}}"); validator.Validate("|a foo:1", "{\"a\":{\"foo\":1},\"_aux\":[1,2,[3,4],2]}"); validator.Validate("|a foo:1", "{\"a\":{\"foo\":1},\"_aux\":[1,2,[3,[1],{\"ab,\":3}],2]}"); validator.Validate("|a foo:1 | b:1", "{\"a\":{\"foo\":1},\"_aux\":{\"a\":\"{\\\"} \"}, \"b\":1}"); } } private void AssertThrow(Action action, Type expectedException = null) { if (expectedException == null) expectedException = typeof(VowpalWabbitException); try { action(); Assert.Fail("Expected exception " + expectedException); } catch (Exception e) { Assert.IsInstanceOfType(e, expectedException); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonInvalid() { using (var vw = new VowpalWabbit("--json")) { AssertThrow(() => vw.ParseJson("{\"_label\":true,\"a\":{\"foo\":1}}")); AssertThrow(() => vw.ParseJson("{\"_labelfoo\":1,\"a\":{\"foo\":1}}")); AssertThrow(() => vw.ParseJson("{\"_label_foo\":1,\"a\":{\"foo\":1}}")); AssertThrow(() => vw.ParseJson("{\"_label\":{\"label\":{\"a\":1}},\"a\":{\"foo\":1}}")); } using (var vw = new VowpalWabbit("--cb_adf --json")) { AssertThrow(() => vw.ParseJson("{\"_label_Action\":1,\"_label_Cost\":-2,\"_label_Probability\":0.3,\"_multi\":[{\"foo\":1}],\"foo\":2,\"_labelIndex\":1}")); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonArray() { using (var validator = new VowpalWabbitExampleJsonValidator()) { validator.Validate("|a :1 :2.3 :4", "{\"a\":[1,2.3,4]}"); validator.Validate("|b :1 :2.3 :4", "{\"a\":{\"b\":[1,2.3,4]}}"); } using (var vw = new VowpalWabbit("--json")) { AssertThrow(() => vw.ParseJson("{\"a\":{\"b\":[1,[1,2],4]}}")); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonSimpleLabel() { using (var validator = new VowpalWabbitExampleJsonValidator()) { validator.Validate("1 |a foo:1", "{\"_label\":{\"Label\":1},\"a\":{\"foo\":1}}", VowpalWabbitLabelComparator.Simple); validator.Validate("1.2 |a foo:1", "{\"_label\":1.2,\"a\":{\"foo\":1}}", VowpalWabbitLabelComparator.Simple); validator.Validate("1.2 |a foo:1", "{\"_label\":1.2,\"a\":{\"foo\":1}}", VowpalWabbitLabelComparator.Simple); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonVWLabel() { using (var validator = new VowpalWabbitExampleJsonValidator()) { validator.Validate("1 |a foo:1", "{\"_label\":1,\"a\":{\"foo\":1}}", VowpalWabbitLabelComparator.Simple); validator.Validate("1 |a foo:1", "{\"_label\":\"1\",\"a\":{\"foo\":1}}", VowpalWabbitLabelComparator.Simple); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonSimpleLabelOverride() { using (var validator = new VowpalWabbitExampleJsonValidator()) { validator.Validate("2 |a foo:1", "{\"_label\":{\"Label\":1},\"a\":{\"foo\":1}}", VowpalWabbitLabelComparator.Simple, new SimpleLabel { Label = 2 }, enableNativeJsonValidation: false /* vw.Parse(json) doesn't support label overwrite */); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonContextualBanditLabel() { using (var validator = new VowpalWabbitExampleJsonValidator("--cb 2 --cb_type dr")) { validator.Validate("1:2:.5 |a foo:1", "{\"_label\":\"1:2:.5\",\"a\":{\"foo\":1}}", VowpalWabbitLabelComparator.ContextualBandit); validator.Validate("1:-2:.3 |a foo:1", "{\"_label\":{\"Action\":1,\"Cost\":-2,\"Probability\":0.3},\"a\":{\"foo\":1}}", VowpalWabbitLabelComparator.ContextualBandit); validator.Validate("1:-2:.3 |a foo:1", "{\"_label_Action\":1,\"_label_Cost\":-2,\"_label_Probability\":0.3,\"a\":{\"foo\":1}}", VowpalWabbitLabelComparator.ContextualBandit); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonADF() { using (var validator = new VowpalWabbitExampleJsonValidator("--cb_adf")) { validator.Validate(new[] { "shared | foo:2", "1:-2:.3 | foo:1" }, "{\"_label_Action\":1,\"_label_Cost\":-2,\"_label_Probability\":0.3,\"_multi\":[{\"foo\":1}],\"foo\":2,\"_labelIndex\":0}", VowpalWabbitLabelComparator.ContextualBandit, index: 1); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonToVWString() { var jsonContext = new JsonContext() { Label = new SimpleLabel { Label = 25 }, Ns1 = new Namespace1 { Foo = 1, Age = "25", DontConsider = "XXX", EscapeCharacterString = "a: a | a\ta", EscapeCharactersText = "b: b | b\tb" }, Ns2 = new Namespace2 { FeatureA = true }, Clicks = 5 }; var jsonContextString = JsonConvert.SerializeObject(jsonContext); using (var validator = new VowpalWabbitExampleJsonValidator(new VowpalWabbitSettings { Arguments = "--json", EnableStringExampleGeneration = true, EnableStringFloatCompact = true, EnableThreadSafeExamplePooling = true })) { validator.Validate("25 | Clicks:5 MoreClicks:0 |a Bar:1 Age25 EscapeCharacterStringa__a___a_a b_ b _ b b |b Marker", jsonContextString, VowpalWabbitLabelComparator.Simple); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonMultiline() { using (var validator = new VowpalWabbitExampleJsonValidator("--cb 2 --cb_type dr")) { validator.Validate(new[] { "shared | Age:25", " | w1 w2 |a x:1", " | w2 w3" }, "{\"Age\":25,\"_multi\":[{\"_text\":\"w1 w2\", \"a\":{\"x\":1}}, {\"_text\":\"w2 w3\"}]}"); validator.Validate(new[] { "shared | Age:25", " | w1 w2 |a x:1", "2:-1:.3 | w2 w3" }, "{\"Age\":25,\"_multi\":[{\"_text\":\"w1 w2\", \"a\":{\"x\":1}}, {\"_text\":\"w2 w3\",\"_label\":\"2:-1:.3\"}]}", VowpalWabbitLabelComparator.ContextualBandit); } using (var validator = new VowpalWabbitExampleJsonValidator( new VowpalWabbitSettings { Arguments = "--cb 2 --cb_type dr", PropertyConfiguration = new PropertyConfiguration { MultiProperty = "adf", TextProperty = "someText", LabelProperty = "theLabel", FeatureIgnorePrefix = "xxx" } })) { validator.Validate(new[] { "shared | Age:25", " | w1 w2 |a x:1", "2:-1:.3 | w2 w3" }, "{\"Age\":25,\"adf\":[{\"someText\":\"w1 w2\", \"a\":{\"x\":1}, \"xxxxIgnoreMe\":2}, {\"someText\":\"w2 w3\",\"theLabel\":\"2:-1:.3\"}]}", VowpalWabbitLabelComparator.ContextualBandit, enableNativeJsonValidation: false /* remapping of special properties is not supported in native JSON */); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonText() { using (var validator = new VowpalWabbitExampleJsonValidator("")) { validator.Validate("| a b c |a d e f", "{\"_text\":\"a b c\",\"a\":{\"_text\":\"d e f\"}}"); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonNumADFs() { using (var validator = new VowpalWabbitExampleJsonValidator("")) { Assert.AreEqual(2, VowpalWabbitJsonSerializer.GetNumberOfActionDependentExamples( "{\"_text\":\"a b c\",\"a\":{\"_text\":\"d e f\"},_multi:[{\"a\":1},{\"b\":2,\"c\":3}]}")); Assert.AreEqual(0, VowpalWabbitJsonSerializer.GetNumberOfActionDependentExamples( "{\"_text\":\"a b c\",\"a\":{\"_text\":\"d e f\"},_multi:[]}")); Assert.AreEqual(0, VowpalWabbitJsonSerializer.GetNumberOfActionDependentExamples( "{\"_text\":\"a b c\",\"a\":{\"_text\":\"d e f\"}}")); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonLabel() { using (var validator = new VowpalWabbitExampleJsonValidator("")) { validator.Validate("1 | a:2 ", "{\"a\":2,\"_label_Label\":1}"); validator.Validate("1:2:3 | a:2 ", "{\"a\":2,\"_label_Label\":1,\"_label_Initial\":2,\"_label_weight\":3}"); } using (var validator = new VowpalWabbitExampleJsonValidator(new VowpalWabbitSettings { Arguments = "--cb_adf", PropertyConfiguration = new PropertyConfiguration { MultiProperty = "adf", TextProperty = "someText", FeatureIgnorePrefix = "xxx" } })) { validator.Validate(new[] { "shared | Age:25", " | w1 w2 |a x:1", "0:-1:.3 | w2 w3" }, "{\"Age\":25,\"adf\":[{\"someText\":\"w1 w2\", \"a\":{\"x\":1}, \"xxxxIgnoreMe\":2}, {\"someText\":\"w2 w3\"}], \"_labelIndex\":1, \"_label_Cost\":-1, \"_label_Probability\":0.3}", VowpalWabbitLabelComparator.ContextualBandit, enableNativeJsonValidation: false); // all lower case (ASA issue) validator.Validate(new[] { " | w1 w2 |a x:1", "0:-1:.3 | w2 w3" }, "{\"adf\":[{\"someText\":\"w1 w2\", \"a\":{\"x\":1}, \"xxxxIgnoreMe\":2}, {\"someText\":\"w2 w3\"}], \"_labelindex\":1, \"_label_cost\":-1, \"_label_probability\":0.3}", VowpalWabbitLabelComparator.ContextualBandit, enableNativeJsonValidation: false); validator.Validate(new[] { "shared | Age:25", " | w1 w2 |a x:1", " | w2 w3" }, "{\"Age\":25,\"adf\":[{\"someText\":\"w1 w2\", \"a\":{\"x\":1}, \"xxxxIgnoreMe\":2}, {\"someText\":\"w2 w3\"}], \"_labelindex\":null}", VowpalWabbitLabelComparator.ContextualBandit, enableNativeJsonValidation: false); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonLabelExtraction() { using (var vw = new VowpalWabbit("--cb_adf --rank_all")) { using (var jsonSerializer = new VowpalWabbitJsonSerializer(vw)) { string eventId = null; jsonSerializer.RegisterExtension((state, property) => { Assert.AreEqual(property, "_eventid"); Assert.IsTrue(state.Reader.Read()); eventId = (string)state.Reader.Value; return true; }); jsonSerializer.Parse("{\"_eventid\":\"abc123\",\"a\":1,\"_label_cost\":-1,\"_label_probability\":0.3}"); Assert.AreEqual("abc123", eventId); using (var examples = jsonSerializer.CreateExamples()) { var single = examples as VowpalWabbitSingleLineExampleCollection; Assert.IsNotNull(single); var label = single.Example.Label as ContextualBanditLabel; Assert.IsNotNull(label); Assert.AreEqual(-1, label.Cost); Assert.AreEqual(0.3, label.Probability, 0.0001); } } using (var jsonSerializer = new VowpalWabbitJsonSerializer(vw)) { jsonSerializer.Parse("{\"_multi\":[{\"_text\":\"w1 w2\", \"a\":{\"x\":1}}, {\"_text\":\"w2 w3\"}], \"_labelindex\":1, \"_label_cost\":-1, \"_label_probability\":0.3}"); using (var examples = jsonSerializer.CreateExamples()) { var multi = examples as VowpalWabbitMultiLineExampleCollection; Assert.IsNotNull(multi); Assert.AreEqual(2, multi.Examples.Length); var label = multi.Examples[0].Label as ContextualBanditLabel; Assert.AreEqual(0, label.Cost); Assert.AreEqual(0, label.Probability); label = multi.Examples[1].Label as ContextualBanditLabel; Assert.IsNotNull(label); Assert.AreEqual(-1, label.Cost); Assert.AreEqual(0.3, label.Probability, 0.0001); } } } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonRedirection() { using (var validator = new VowpalWabbitExampleJsonValidator(new VowpalWabbitSettings("--cb_adf"))) { validator.Validate(new[] { "shared | Age:25", " | w1 w2 |a x:1", "0:-1:.3 | w2 w3" }, "{\"_ignoreMe\":5,\"_sub\":{\"Age\":25,\"_multi\":[{\"_text\":\"w1 w2\", \"a\":{\"x\":1}}, {\"_text\":\"w2 w3\"}]}, \"_labelIndex\":1, \"_label_Cost\":-1, \"_label_Probability\":0.3}", VowpalWabbitLabelComparator.ContextualBandit, extension: (state, property) => { if (!property.Equals("_sub")) return false; Assert.AreEqual(state.MultiIndex, -1); state.Parse(); return true; }); validator.Validate(new[] { "shared | Age:25", " | w1 w2 |a x:1", "0:-1:.3 | w2 w3" }, "{\"Age\":25,\"_multi\":[{\"_text\":\"w1 w2\", \"a\":{\"x\":1}}, {\"_text\":\"w2 w3\", \"_tag\":\"2\"}], \"_labelIndex\":1, \"_label_Cost\":-1, \"_label_Probability\":0.3}", VowpalWabbitLabelComparator.ContextualBandit, extension: (state, property) => { if (!property.Equals("_tag")) return false; var tag = state.Reader.ReadAsString(); Assert.AreEqual(1, state.MultiIndex); Assert.AreEqual("2", tag); return true; }); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestDecisionServiceJson() { using (var vw = new VowpalWabbit("--cb_adf")) { var json = @"{""EventId"":""abc"",""a"":[1,2,3],""Version"":""1"",""c"":{""u"":{""loc"":""New York""},""_multi"":[{""x"":[{""x"":{""cat"":""1""}},null,{""y"":{""cat"":""3""}}]},{""x"":{""cat"":""2""}}]},""p"":[0.8,0.1,0.1]}"; var obj = JsonConvert.DeserializeObject(json); var bytes = Encoding.UTF8.GetBytes(json); VowpalWabbitDecisionServiceInteractionHeader header; List examples = null; try { examples = vw.ParseDecisionServiceJson(bytes, 0, bytes.Length, copyJson: false, header: out header); Assert.AreEqual("abc", header.EventId); CollectionAssert.AreEqual(new[] { 1, 2, 3 }, header.Actions, "Actions mismatch"); CollectionAssert.AreEqual(new[] { .8f, .1f, .1f }, header.Probabilities, "Probabilities mismatch"); Assert.AreEqual(0, header.ProbabilityOfDrop); using (var validator = new VowpalWabbitExampleJsonValidator(new VowpalWabbitSettings("--cb_adf"))) { var expected = new[] { "shared |u locNew_York", " |x cat1 |y cat3", " |x cat2" }; validator.Validate(expected, examples); } } finally { if (examples != null) foreach (var ex in examples) if (ex != null) ex.Dispose(); } } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestDecisionServiceJson2() { var json = "{\"Version\":\"2\",\"EventId\":\"73369b13ec98433096a1496d27da0bfd\",\"a\":[9,11,13,6,4,5,12,1,2,10,8,3,7],\"c\":{\"_synthetic\":false,\"User\":{\"_age\":0},\"Geo\":{\"country\":\"United States\",\"_countrycf\":\"8\",\"state\":\"New Jersey\",\"city\":\"Somerdale\",\"_citycf\":\"5\",\"dma\":\"504\"},\"MRefer\":{\"referer\":\"http://www.complex.com/\"},\"OUserAgent\":{\"_ua\":\"Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_2 like Mac OS X) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.0 Mobile/14F89 Safari/602.1\",\"_DeviceBrand\":\"Apple\",\"_DeviceFamily\":\"iPhone\",\"_DeviceIsSpider\":false,\"_DeviceModel\":\"iPhone\",\"_OSFamily\":\"iOS\",\"_OSMajor\":\"10\",\"_OSPatch\":\"2\",\"DeviceType\":\"Mobile\"},\"_multi\":[{\"_tag\":\"cmplx$http://www.complex.com/sports/2017/06/floyd-mayweater-conor-mcgregor-may-be-set\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/sports/2017/06/floyd-mayweater-conor-mcgregor-may-be-set\"},\"j\":[{\"_title\":\"The Floyd Mayweather vs. Conor McGregor Fight Date Has Finally Been Announced\"},{\"RVisionTags\":{\"person\":0.999368966,\"man\":0.998108864,\"wearing\":0.9368642,\"hat\":0.928866565,\"indoor\":0.893332958,\"close\":0.201101109},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0148094837,\"racyScore\":0.0144806523},\"_expires\":\"2017-06-17T21:42:30.3186957Z\"},{\"Emotion0\":{\"anger\":1.81591489E-07,\"contempt\":9.946987E-06,\"disgust\":6.11135547E-05,\"fear\":4.565633E-12,\"happiness\":0.999928534,\"neutral\":2.28114558E-07,\"sadness\":3.07409E-09,\"surprise\":1.46155665E-08},\"_expires\":\"2017-06-17T21:42:28.8496462Z\"},{\"Tags\":{\"Floyd Mayweather Jr.\":0.982,\"Conor McGregor\":0.938,\"Complex\":0.334,\"Twitter Inc.\":0.997,\"Dan Mullane\":0.006,\"Mixed martial arts\":1,\"Net Controls\":0.281,\"Boxing\":1,\"Ontario\":1,\"Dana White\":0.972,\"Las Vegas Valley\":0.995,\"Nevada Athletic Commission\":0.024,\"Mayweather Promotions\":0.118,\"MGM Grand Garden Arena\":0.997,\"Fighting game\":0.641,\"Nevada\":1,\"Coming out\":0.076},\"_expires\":\"2017-06-17T21:42:29.8271823Z\"},{\"XSentiment\":2.93618323E-05,\"_expires\":\"2017-06-17T21:42:29.1777863Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/music/2017/06/young-thug-beautiful-thugger-girls-violent-trailer-has-people-upset\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/music/2017/06/young-thug-beautiful-thugger-girls-violent-trailer-has-people-upset\"},\"j\":[{\"_title\":\"Why Young Thug's Violent Trailer for 'Beautiful Thugger Girls' Has People Upset\"},{\"RVisionTags\":{\"person\":0.9621564,\"indoor\":0.93759197},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0590519942,\"racyScore\":0.0740057454},\"_expires\":\"2017-06-17T17:27:41.0750729Z\"},{\"Emotion0\":{\"anger\":0.000195411936,\"contempt\":0.0007970728,\"disgust\":7.29157255E-05,\"fear\":0.000106336483,\"happiness\":0.000127831052,\"neutral\":0.9209777,\"sadness\":0.0775285438,\"surprise\":0.000194183245},\"_expires\":\"2017-06-17T17:27:33.4982953Z\"},{\"Tags\":{\"Young Thug\":0.445,\"Beautiful\":0.005,\"Twitter Inc.\":1,\"Drake\":0.968,\"Instagram\":0.995,\"Breezy\":0.014,\"June 13\":0.008,\"Cover art\":0.003,\"Album\":0.99,\"Surface\":0.012,\"Prince Michael Jackson II\":1},\"_expires\":\"2017-06-17T17:27:33.4670277Z\"},{\"XSentiment\":2.46002031E-07,\"_expires\":\"2017-06-17T17:27:34.0139318Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/sports/2017/06/did-we-just-witness-peak-lebron\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/sports/2017/06/did-we-just-witness-peak-lebron\"},\"j\":[{\"_title\":\"Did We Just Witness Peak LeBron?\"},{\"RVisionTags\":{\"person\":0.9887383,\"indoor\":0.8961688},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0181511845,\"racyScore\":0.0380923077},\"_expires\":\"2017-06-17T14:46:55.4499648Z\"},{\"Emotion0\":{\"anger\":0.0007875018,\"contempt\":0.000600368367,\"disgust\":0.00288117747,\"fear\":0.00168624625,\"happiness\":4.59727671E-05,\"neutral\":0.377137423,\"sadness\":0.00264490047,\"surprise\":0.6142164},\"_expires\":\"2017-06-17T14:46:49.7072998Z\"},{\"Tags\":{\"LeBron James\":1,\"Complex\":0.08,\"Broadcasting of sports events\":0.12,\"Twitter Inc.\":0.999,\"USA Today\":0.994,\"Kyle Broflovski\":0.023,\"Superman\":0.694,\"Cleveland Cavaliers\":1,\"UNK NBA\":1,\"Bill Russell NBA Finals Most Valuable Player Award\":1,\"Golden State Warriors\":1,\"Michael Jordan\":1,\"Kobe Bryant\":1,\"Time\":0.865,\"Magic Johnson\":0.999},\"_expires\":\"2017-06-17T14:46:49.4416286Z\"},{\"XSentiment\":0.9999974,\"_expires\":\"2017-06-17T14:46:49.7541575Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/sports/2017/06/kevin-durant-made-the-difference\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/sports/2017/06/kevin-durant-made-the-difference\"},\"j\":[{\"_title\":\"Golden State (Probably) Would Have Blown Another Lead Without KD\"},{\"RVisionTags\":{\"person\":0.999950767,\"player\":0.984833837,\"sport\":0.9816471,\"athletic game\":0.9691899,\"basketball\":0.7260069,\"hand\":0.463383943,\"crowd\":0.3354485},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0127091147,\"racyScore\":0.01585682},\"_expires\":\"2017-06-17T14:44:47.7452593Z\"},{\"Emotion0\":{\"anger\":0.9830929,\"contempt\":6.38814454E-05,\"disgust\":0.0142428661,\"fear\":4.3443215E-05,\"happiness\":0.00127731345,\"neutral\":0.0003242454,\"sadness\":0.0006526729,\"surprise\":0.0003026811},\"_expires\":\"2017-06-17T14:44:47.1857099Z\"},{\"Tags\":{\"Golden State Warriors\":1,\"Lead guitar\":0.046,\"Kevin Durant\":1,\"Twitter Inc.\":0.985,\"Martinez\":0.015,\"Splash Brothers\":1,\"Cleveland Cavaliers\":1,\"Monday Night Football\":0.398,\"Richard Jefferson\":0.95,\"Kevin Love\":0.997,\"McDonald's All-American Game\":0.859,\"University of Texas at Austin College of Fine Arts\":0.999,\"Naismith College Player of the Year\":0.117,\"UNK NBA\":1,\"NBA Most Valuable Player Award\":1,\"Olympic Games\":0.858,\"NBA All-Star Game\":1,\"Champion\":0.12,\"Bill Russell NBA Finals Most Valuable Player Award\":1,\"Stephen Curry\":0.989,\"Draymond Green\":0.674,\"Seat Pleasant\":0.122},\"_expires\":\"2017-06-17T14:44:46.9264793Z\"},{\"XSentiment\":1,\"_expires\":\"2017-06-17T14:44:47.4639789Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/music/2017/06/2-chainz-dna-freestyle-kendrick-lamar\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/music/2017/06/2-chainz-dna-freestyle-kendrick-lamar\"},\"j\":[{\"_title\":\"Watch 2 Chainz Flex on Kendrick Lamar's \\\"DNA\\\" Beat in New Freestyle\"},{\"RVisionTags\":{\"person\":0.9840661,\"dressed\":0.304036647},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0285988934,\"racyScore\":0.0257010516},\"_expires\":\"2017-06-17T19:30:46.5524171Z\"},{\"_expires\":\"2017-06-17T19:30:46.2086571Z\"},{\"Tags\":{\"2 Chainz\":1,\"Kendrick Lamar\":1,\"DNA\":0.053,\"Hip hop production\":0.015,\"Freestyle rap\":0.934,\"Philadelphia\":0.193,\"Twitter Inc.\":1,\"Subscription business model\":0.011,\"Complex\":1,\"Los Angeles\":1,\"Georgia\":0.894,\"Trap\":0.579,\"Top Dawg Entertainment\":0.872,\"Virtual reality\":0.011,\"Travis Scott\":0.011,\"Collaboration\":0.004,\"Nicki Minaj\":0.999,\"Remy Ma\":0.99,\"Papoose\":0.892,\"Sampling\":0.017,\"Hip hop music\":1,\"Tha Carter V\":1,\"Everyday (ASAP Rocky song)\":0.003},\"_expires\":\"2017-06-17T19:30:46.1314041Z\"},{\"XSentiment\":1,\"_expires\":\"2017-06-17T19:30:46.7944713Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/sports/2017/06/floyd-mayweather-viral-challenge-backfires-reacts\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/sports/2017/06/floyd-mayweather-viral-challenge-backfires-reacts\"},\"j\":[{\"_title\":\"Floyd Mayweather Attempted to Start His Own Viral Challenge and It Hilariously Backfired\"},{\"RVisionTags\":{\"person\":0.999924064,\"man\":0.9552084,\"crowd\":0.01707872},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0114686647,\"racyScore\":0.0159121435},\"_expires\":\"2017-06-17T21:13:01.1925796Z\"},{\"Emotion0\":{\"anger\":0.00127403683,\"contempt\":0.0222781487,\"disgust\":0.0002176114,\"fear\":9.893078E-06,\"happiness\":0.07823167,\"neutral\":0.897131145,\"sadness\":0.00048493495,\"surprise\":0.0003725856},\"_expires\":\"2017-06-17T21:12:59.3669891Z\"},{\"Tags\":{\"Floyd Mayweather Jr.\":0.192,\"Philadelphia\":0.65,\"Twitter Inc.\":1,\"USA Today\":0.979,\"Sport\":0.85,\"Conor McGregor\":0.016,\"June 14\":0.009,\"Troy\":0.005,\"Honda Civic\":0.004,\"Bank account\":0.025,\"NASCAR on TNT\":0.099,\"Boxing\":1},\"_expires\":\"2017-06-17T21:12:59.3201526Z\"},{\"XSentiment\":0.999998,\"_expires\":\"2017-06-17T21:13:01.7647699Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/sports/2017/06/kevin-durant-discusses-random-text-he-received-from-obama-after-winning-nba-finals\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/sports/2017/06/kevin-durant-discusses-random-text-he-received-from-obama-after-winning-nba-finals\"},\"j\":[{\"_title\":\"Kevin Durant Discusses 'Random' Text He Received From Obama After Winning NBA Finals\"},{\"RVisionTags\":{\"person\":0.9939494,\"outdoor\":0.9162231,\"male\":0.242890328},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0208446719,\"racyScore\":0.0304868072},\"_expires\":\"2017-06-17T18:28:04.5675692Z\"},{\"Emotion0\":{\"anger\":2.35385942E-05,\"contempt\":3.294205E-06,\"disgust\":1.38025935E-05,\"fear\":7.8729E-06,\"happiness\":0.9997165,\"neutral\":0.0001607666,\"sadness\":1.25915176E-05,\"surprise\":6.161377E-05},\"Emotion1\":{\"anger\":0.00371822272,\"contempt\":0.000460597221,\"disgust\":0.000157746123,\"fear\":0.000275517668,\"happiness\":0.0403934456,\"neutral\":0.86978966,\"sadness\":0.08499769,\"surprise\":0.000207107121},\"_expires\":\"2017-06-17T18:28:03.3944386Z\"},{\"Tags\":{\"Kevin Durant\":1,\"Random House\":0.069,\"Barack Obama\":1,\"UNK NBA\":1,\"Twitter Inc.\":1,\"USA Today\":0.999,\"Sports journalism\":0.01,\"Cary\":0.025,\"The NBA Finals\":0.017,\"Monday Night Football\":0.943,\"Golden State Warriors\":1,\"Bill Simmons\":0.866,\"Podcast\":0.918,\"Oracle Arena\":0.028,\"Bill Russell NBA Finals Most Valuable Player Award\":1,\"June 13\":0.034,\"Cleveland Cavaliers\":1,\"LeBron James\":1,\"Kyrie Irving\":1,\"Allen Iverson\":0.999,\"Rihanna\":0.63,\"The League\":0.166,\"Stay\":0.858,\"Singing\":0.024,\"President of the United States\":1},\"_expires\":\"2017-06-17T18:28:03.8094333Z\"},{\"XSentiment\":0.9999663,\"_expires\":\"2017-06-17T18:28:03.9031587Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/pop-culture/2017/06/tj-miller-hbo-special-interview\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/pop-culture/2017/06/tj-miller-hbo-special-interview\"},\"j\":[{\"_title\":\"T.J. Miller's Done With 'Silicon Valley,' But His Career's Just Getting Started\"},{\"RVisionTags\":{\"person\":0.999560535,\"man\":0.9939658,\"suit\":0.950625,\"outdoor\":0.9169477,\"wearing\":0.7708228,\"jacket\":0.528340757,\"coat\":0.490623325,\"dark\":0.32463637,\"male\":0.212570518,\"microphone\":0.148984566,\"crowd\":0.0156043554},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0102083739,\"racyScore\":0.0126593616},\"_expires\":\"2017-06-17T18:59:23.2248419Z\"},{\"Emotion0\":{\"anger\":1.0740634E-05,\"contempt\":1.42603E-05,\"disgust\":5.26589356E-05,\"fear\":1.20671484E-06,\"happiness\":0.996862,\"neutral\":0.00302408962,\"sadness\":5.683868E-06,\"surprise\":2.93453577E-05},\"_expires\":\"2017-06-17T18:59:22.2148365Z\"},{\"Tags\":{\"T. J. Miller\":0.999,\"Silicon Valley\":0.999,\"Whitney\":0.264,\"Twitter Inc.\":1,\"HBO\":1,\"Complex\":0.032,\"The Gorburger Show\":0.062,\"Funny or Die\":0.914,\"Comedy Central\":1,\"Japan\":1,\"Ridiculousness\":0.004,\"Deadpool\":0.117,\"Cloverfield\":0.921,\"Cannes Film Festival\":0.987,\"Energizer Bunny\":0.007,\"Amy Schumer\":0.021,\"Pete Holmes\":0.153,\"Peter Boyle\":0.003,\"Downtown Los Angeles\":0.843,\"Supervillain\":0.038,\"San Francisco\":0.809,\"Jesus Christ\":0.902,\"Kong: Skull Island\":0.751,\"Jordan Vogt-Roberts\":0.979,\"Usher\":0.325,\"Flea\":0.536,\"Henry Rollins\":0.096,\"Federal government of the United States\":0.485,\"Mike Judge\":0.974,\"Uber\":0.404,\"Chelsea Handler\":0.364},\"_expires\":\"2017-06-17T18:59:22.7021704Z\"},{\"_expires\":\"2017-06-17T18:59:22.7489821Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/life/2017/06/iphone-8-edge-to-edge-screen\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/life/2017/06/iphone-8-edge-to-edge-screen\"},\"j\":[{\"_title\":\"Newly-Leaked Pictures Show You What iPhone 8 Screen Might Look Like\"},{\"RVisionTags\":{\"iPod\":0.802692235,\"electronics\":0.7313406},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0145561891,\"racyScore\":0.0151244327},\"_expires\":\"2017-06-17T23:14:19.5133399Z\"},{\"_expires\":\"2017-06-17T23:14:18.3277963Z\"},{\"Tags\":{\"iOS\":0.492,\"Complex\":0.102,\"Twitter Inc.\":1,\"Imgur\":0.996,\"Apple Inc.\":1,\"Reddit\":0.57,\"Check It Out\":0.003,\"China\":0.813,\"iPhone\":1},\"_expires\":\"2017-06-17T23:14:18.0245382Z\"},{\"XSentiment\":0.9994883,\"_expires\":\"2017-06-17T23:14:18.7364306Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/sports/2017/06/ryan-destiny-get-sweaty\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/sports/2017/06/ryan-destiny-get-sweaty\"},\"j\":[{\"_title\":\"Ryan Destiny Talks About Starring in Hit TV Series 'Star' on Get Sweaty With Emily Oberg\"},{\"RVisionTags\":{\"person\":0.998844266,\"boxing\":0.573501348},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0180808976,\"racyScore\":0.07170816},\"_expires\":\"2017-06-17T19:55:45.6659091Z\"},{\"Emotion0\":{\"anger\":0.0428811572,\"contempt\":0.0120455148,\"disgust\":0.00588818826,\"fear\":0.0031699785,\"happiness\":0.0325962044,\"neutral\":0.839067638,\"sadness\":0.0560076348,\"surprise\":0.00834368449},\"Emotion1\":{\"anger\":9.522394E-08,\"contempt\":3.42922242E-08,\"disgust\":3.1531672E-06,\"fear\":4.24115054E-09,\"happiness\":0.999994,\"neutral\":2.3093894E-06,\"sadness\":1.65964408E-07,\"surprise\":2.82413E-07},\"_expires\":\"2017-06-17T19:55:44.6508371Z\"},{\"Tags\":{\"Destiny\":0.299,\"HiT TV\":0.008,\"Complex\":0.953,\"Twitter Inc.\":1,\"New York City\":1,\"Robert E. Lee\":0.004,\"Fox Broadcasting Company\":1,\"Naomi Campbell\":0.091,\"Sy Kravitz\":0.731,\"Queen Latifah\":0.876},\"_expires\":\"2017-06-17T19:55:44.2710467Z\"},{\"XSentiment\":0.113136955,\"_expires\":\"2017-06-17T19:55:45.0269667Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/music/2017/06/2-chainz-dna-freestyle-kendrick-lamar\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/music/2017/06/2-chainz-dna-freestyle-kendrick-lamar\"},\"j\":[{\"_title\":\"Watch 2 Chainz Flex on Kendrick Lamar's \\\"DNA\\\" Beat in New Freestyle\"}]},{\"_tag\":\"cmplx$http://www.complex.com/music/2017/06/everyday-struggle-ep39-kehlani-tinashe\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/music/2017/06/everyday-struggle-ep39-kehlani-tinashe\"},\"j\":[{\"_title\":\"Joe Budden and DJ Akademiks Discuss Tinashe Controversy and Kehlani Cussing Out Heckler on 'Everyday Struggle'\"},{\"RVisionTags\":{\"abstract\":0.5319324},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.09698458,\"racyScore\":0.08712957},\"_expires\":\"2017-06-17T17:56:47.5354099Z\"},{\"Emotion0\":{\"anger\":0.0121765705,\"contempt\":0.0152475182,\"disgust\":0.0186378732,\"fear\":0.00241010683,\"happiness\":0.202662408,\"neutral\":0.6867056,\"sadness\":0.0549196824,\"surprise\":0.00724024652},\"Emotion1\":{\"anger\":0.005693211,\"contempt\":0.0003093396,\"disgust\":0.000264122762,\"fear\":0.000122387908,\"happiness\":0.000162528377,\"neutral\":0.9901545,\"sadness\":0.00110488839,\"surprise\":0.00218904181},\"_expires\":\"2017-06-17T17:56:46.5115225Z\"},{\"Tags\":{\"Joe Budden\":0.972,\"Disc jockey\":0.998,\"Tinashe\":0.485,\"Kehlani\":0.011,\"Profanity\":0.006,\"Heckler\":0.003,\"Complex\":0.992,\"Twitter Inc.\":0.959,\"XXL\":0.906,\"Kyrie Irving\":0.277,\"LeBron James\":0.998,\"Michael Jordan\":0.989,\"Floyd Mayweather Jr.\":0.031,\"Conor McGregor\":0.276},\"_expires\":\"2017-06-17T17:56:46.0896119Z\"},{\"XSentiment\":0.005011654,\"_expires\":\"2017-06-17T17:56:46.6990201Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/sports/2017/06/lonzo-ball-interview\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/sports/2017/06/lonzo-ball-interview\"},\"j\":[{\"_title\":\"Lonzo Ball Finally Told Us How He Really Feels About LaVar's Media Antics\"},{\"RVisionTags\":{\"person\":0.9992661,\"sport\":0.9894762,\"athletic game\":0.9814596,\"basketball\":0.9540427,\"player\":0.897939742,\"crowd\":0.529209554,\"watching\":0.450753957},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0182933789,\"racyScore\":0.0168850645},\"_expires\":\"2017-06-17T14:11:20.108671Z\"},{\"Emotion0\":{\"anger\":6.044781E-05,\"contempt\":2.85142833E-05,\"disgust\":4.95703644E-05,\"fear\":0.008861069,\"happiness\":2.874653E-05,\"neutral\":0.8961255,\"sadness\":0.00528799742,\"surprise\":0.08955817},\"_expires\":\"2017-06-17T14:11:19.6899531Z\"},{\"Tags\":{\"Los Angeles Angels of Anaheim\":0.515,\"Songwriter\":0.33,\"Twitter Inc.\":1,\"USA Today\":1,\"Broadcasting of sports events\":0.038,\"Chino Hills\":0.002,\"UCLA Bruins men's basketball\":0.6,\"NCAA Men's Division I Basketball Championship\":0.897,\"Sweet\":0.019,\"Todd Marinovich\":0.938,\"Marv Albert\":0.055,\"UNK NBA\":1,\"Fox Broadcasting Company\":0.933,\"Jayson Tatum\":0.004,\"ZO2\":0.008,\"Los Angeles Lakers\":1,\"Lamar Odom\":0.986,\"Lamar Cardinals Men's Basketball\":0.004,\"Magic Johnson\":0.995,\"Jason Kidd\":1,\"LeBron James\":1,\"James Harden\":0.99,\"Adidas\":0.811,\"Puerto Rico\":0.012,\"Stephen Curry\":0.981,\"Michael Jordan\":1,\"Shaquille O'Neal\":1},\"_expires\":\"2017-06-17T14:11:19.6587216Z\"},{\"XSentiment\":1,\"_expires\":\"2017-06-17T14:11:20.0892306Z\"}]}]},\"p\":[0.8153846,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154],\"VWState\":{\"m\":\"decc63fa2c284ec9887ee0572ea16d17/7860031216114d8bb718c40abc801bf4\"}}"; using (var vw = new VowpalWabbit("--cb_adf")) { var obj = JsonConvert.DeserializeObject(json); var bytes = new byte[Encoding.UTF8.GetMaxByteCount(json.Length) + 1]; var bytes2 = new byte[Encoding.UTF8.GetMaxByteCount(json.Length) + 1]; var byteLen = Encoding.UTF8.GetBytes(json, 0, json.Length, bytes, 0) + 1;// trailing \0 Array.Copy(bytes, bytes2, bytes.Length); VowpalWabbitDecisionServiceInteractionHeader header; List examples = null; try { examples = vw.ParseDecisionServiceJson(bytes, 0, byteLen, copyJson: false, header: out header); Assert.AreEqual("73369b13ec98433096a1496d27da0bfd", header.EventId); CollectionAssert.AreEqual(new[] { 9, 11, 13, 6, 4, 5, 12, 1, 2, 10, 8, 3, 7 }, header.Actions, "Actions mismatch"); CollectionAssert.AreEqual(new[] { 0.8153846f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f }, header.Probabilities, "Probabilities mismatch"); Assert.AreEqual(0, header.ProbabilityOfDrop); Assert.AreEqual(14, examples.Count); // check if copyJson: false was actually used CollectionAssert.AreNotEqual(bytes2, bytes); } finally { if (examples != null) foreach (var ex in examples) if (ex != null) ex.Dispose(); } } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestDecisionServiceJson_CopyJson() { var json = "{\"Version\":\"2\",\"EventId\":\"73369b13ec98433096a1496d27da0bfd\",\"a\":[9,11,13,6,4,5,12,1,2,10,8,3,7],\"c\":{\"_synthetic\":false,\"User\":{\"_age\":0},\"Geo\":{\"country\":\"United States\",\"_countrycf\":\"8\",\"state\":\"New Jersey\",\"city\":\"Somerdale\",\"_citycf\":\"5\",\"dma\":\"504\"},\"MRefer\":{\"referer\":\"http://www.complex.com/\"},\"OUserAgent\":{\"_ua\":\"Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_2 like Mac OS X) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.0 Mobile/14F89 Safari/602.1\",\"_DeviceBrand\":\"Apple\",\"_DeviceFamily\":\"iPhone\",\"_DeviceIsSpider\":false,\"_DeviceModel\":\"iPhone\",\"_OSFamily\":\"iOS\",\"_OSMajor\":\"10\",\"_OSPatch\":\"2\",\"DeviceType\":\"Mobile\"},\"_multi\":[{\"_tag\":\"cmplx$http://www.complex.com/sports/2017/06/floyd-mayweater-conor-mcgregor-may-be-set\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/sports/2017/06/floyd-mayweater-conor-mcgregor-may-be-set\"},\"j\":[{\"_title\":\"The Floyd Mayweather vs. Conor McGregor Fight Date Has Finally Been Announced\"},{\"RVisionTags\":{\"person\":0.999368966,\"man\":0.998108864,\"wearing\":0.9368642,\"hat\":0.928866565,\"indoor\":0.893332958,\"close\":0.201101109},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0148094837,\"racyScore\":0.0144806523},\"_expires\":\"2017-06-17T21:42:30.3186957Z\"},{\"Emotion0\":{\"anger\":1.81591489E-07,\"contempt\":9.946987E-06,\"disgust\":6.11135547E-05,\"fear\":4.565633E-12,\"happiness\":0.999928534,\"neutral\":2.28114558E-07,\"sadness\":3.07409E-09,\"surprise\":1.46155665E-08},\"_expires\":\"2017-06-17T21:42:28.8496462Z\"},{\"Tags\":{\"Floyd Mayweather Jr.\":0.982,\"Conor McGregor\":0.938,\"Complex\":0.334,\"Twitter Inc.\":0.997,\"Dan Mullane\":0.006,\"Mixed martial arts\":1,\"Net Controls\":0.281,\"Boxing\":1,\"Ontario\":1,\"Dana White\":0.972,\"Las Vegas Valley\":0.995,\"Nevada Athletic Commission\":0.024,\"Mayweather Promotions\":0.118,\"MGM Grand Garden Arena\":0.997,\"Fighting game\":0.641,\"Nevada\":1,\"Coming out\":0.076},\"_expires\":\"2017-06-17T21:42:29.8271823Z\"},{\"XSentiment\":2.93618323E-05,\"_expires\":\"2017-06-17T21:42:29.1777863Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/music/2017/06/young-thug-beautiful-thugger-girls-violent-trailer-has-people-upset\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/music/2017/06/young-thug-beautiful-thugger-girls-violent-trailer-has-people-upset\"},\"j\":[{\"_title\":\"Why Young Thug's Violent Trailer for 'Beautiful Thugger Girls' Has People Upset\"},{\"RVisionTags\":{\"person\":0.9621564,\"indoor\":0.93759197},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0590519942,\"racyScore\":0.0740057454},\"_expires\":\"2017-06-17T17:27:41.0750729Z\"},{\"Emotion0\":{\"anger\":0.000195411936,\"contempt\":0.0007970728,\"disgust\":7.29157255E-05,\"fear\":0.000106336483,\"happiness\":0.000127831052,\"neutral\":0.9209777,\"sadness\":0.0775285438,\"surprise\":0.000194183245},\"_expires\":\"2017-06-17T17:27:33.4982953Z\"},{\"Tags\":{\"Young Thug\":0.445,\"Beautiful\":0.005,\"Twitter Inc.\":1,\"Drake\":0.968,\"Instagram\":0.995,\"Breezy\":0.014,\"June 13\":0.008,\"Cover art\":0.003,\"Album\":0.99,\"Surface\":0.012,\"Prince Michael Jackson II\":1},\"_expires\":\"2017-06-17T17:27:33.4670277Z\"},{\"XSentiment\":2.46002031E-07,\"_expires\":\"2017-06-17T17:27:34.0139318Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/sports/2017/06/did-we-just-witness-peak-lebron\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/sports/2017/06/did-we-just-witness-peak-lebron\"},\"j\":[{\"_title\":\"Did We Just Witness Peak LeBron?\"},{\"RVisionTags\":{\"person\":0.9887383,\"indoor\":0.8961688},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0181511845,\"racyScore\":0.0380923077},\"_expires\":\"2017-06-17T14:46:55.4499648Z\"},{\"Emotion0\":{\"anger\":0.0007875018,\"contempt\":0.000600368367,\"disgust\":0.00288117747,\"fear\":0.00168624625,\"happiness\":4.59727671E-05,\"neutral\":0.377137423,\"sadness\":0.00264490047,\"surprise\":0.6142164},\"_expires\":\"2017-06-17T14:46:49.7072998Z\"},{\"Tags\":{\"LeBron James\":1,\"Complex\":0.08,\"Broadcasting of sports events\":0.12,\"Twitter Inc.\":0.999,\"USA Today\":0.994,\"Kyle Broflovski\":0.023,\"Superman\":0.694,\"Cleveland Cavaliers\":1,\"UNK NBA\":1,\"Bill Russell NBA Finals Most Valuable Player Award\":1,\"Golden State Warriors\":1,\"Michael Jordan\":1,\"Kobe Bryant\":1,\"Time\":0.865,\"Magic Johnson\":0.999},\"_expires\":\"2017-06-17T14:46:49.4416286Z\"},{\"XSentiment\":0.9999974,\"_expires\":\"2017-06-17T14:46:49.7541575Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/sports/2017/06/kevin-durant-made-the-difference\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/sports/2017/06/kevin-durant-made-the-difference\"},\"j\":[{\"_title\":\"Golden State (Probably) Would Have Blown Another Lead Without KD\"},{\"RVisionTags\":{\"person\":0.999950767,\"player\":0.984833837,\"sport\":0.9816471,\"athletic game\":0.9691899,\"basketball\":0.7260069,\"hand\":0.463383943,\"crowd\":0.3354485},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0127091147,\"racyScore\":0.01585682},\"_expires\":\"2017-06-17T14:44:47.7452593Z\"},{\"Emotion0\":{\"anger\":0.9830929,\"contempt\":6.38814454E-05,\"disgust\":0.0142428661,\"fear\":4.3443215E-05,\"happiness\":0.00127731345,\"neutral\":0.0003242454,\"sadness\":0.0006526729,\"surprise\":0.0003026811},\"_expires\":\"2017-06-17T14:44:47.1857099Z\"},{\"Tags\":{\"Golden State Warriors\":1,\"Lead guitar\":0.046,\"Kevin Durant\":1,\"Twitter Inc.\":0.985,\"Martinez\":0.015,\"Splash Brothers\":1,\"Cleveland Cavaliers\":1,\"Monday Night Football\":0.398,\"Richard Jefferson\":0.95,\"Kevin Love\":0.997,\"McDonald's All-American Game\":0.859,\"University of Texas at Austin College of Fine Arts\":0.999,\"Naismith College Player of the Year\":0.117,\"UNK NBA\":1,\"NBA Most Valuable Player Award\":1,\"Olympic Games\":0.858,\"NBA All-Star Game\":1,\"Champion\":0.12,\"Bill Russell NBA Finals Most Valuable Player Award\":1,\"Stephen Curry\":0.989,\"Draymond Green\":0.674,\"Seat Pleasant\":0.122},\"_expires\":\"2017-06-17T14:44:46.9264793Z\"},{\"XSentiment\":1,\"_expires\":\"2017-06-17T14:44:47.4639789Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/music/2017/06/2-chainz-dna-freestyle-kendrick-lamar\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/music/2017/06/2-chainz-dna-freestyle-kendrick-lamar\"},\"j\":[{\"_title\":\"Watch 2 Chainz Flex on Kendrick Lamar's \\\"DNA\\\" Beat in New Freestyle\"},{\"RVisionTags\":{\"person\":0.9840661,\"dressed\":0.304036647},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0285988934,\"racyScore\":0.0257010516},\"_expires\":\"2017-06-17T19:30:46.5524171Z\"},{\"_expires\":\"2017-06-17T19:30:46.2086571Z\"},{\"Tags\":{\"2 Chainz\":1,\"Kendrick Lamar\":1,\"DNA\":0.053,\"Hip hop production\":0.015,\"Freestyle rap\":0.934,\"Philadelphia\":0.193,\"Twitter Inc.\":1,\"Subscription business model\":0.011,\"Complex\":1,\"Los Angeles\":1,\"Georgia\":0.894,\"Trap\":0.579,\"Top Dawg Entertainment\":0.872,\"Virtual reality\":0.011,\"Travis Scott\":0.011,\"Collaboration\":0.004,\"Nicki Minaj\":0.999,\"Remy Ma\":0.99,\"Papoose\":0.892,\"Sampling\":0.017,\"Hip hop music\":1,\"Tha Carter V\":1,\"Everyday (ASAP Rocky song)\":0.003},\"_expires\":\"2017-06-17T19:30:46.1314041Z\"},{\"XSentiment\":1,\"_expires\":\"2017-06-17T19:30:46.7944713Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/sports/2017/06/floyd-mayweather-viral-challenge-backfires-reacts\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/sports/2017/06/floyd-mayweather-viral-challenge-backfires-reacts\"},\"j\":[{\"_title\":\"Floyd Mayweather Attempted to Start His Own Viral Challenge and It Hilariously Backfired\"},{\"RVisionTags\":{\"person\":0.999924064,\"man\":0.9552084,\"crowd\":0.01707872},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0114686647,\"racyScore\":0.0159121435},\"_expires\":\"2017-06-17T21:13:01.1925796Z\"},{\"Emotion0\":{\"anger\":0.00127403683,\"contempt\":0.0222781487,\"disgust\":0.0002176114,\"fear\":9.893078E-06,\"happiness\":0.07823167,\"neutral\":0.897131145,\"sadness\":0.00048493495,\"surprise\":0.0003725856},\"_expires\":\"2017-06-17T21:12:59.3669891Z\"},{\"Tags\":{\"Floyd Mayweather Jr.\":0.192,\"Philadelphia\":0.65,\"Twitter Inc.\":1,\"USA Today\":0.979,\"Sport\":0.85,\"Conor McGregor\":0.016,\"June 14\":0.009,\"Troy\":0.005,\"Honda Civic\":0.004,\"Bank account\":0.025,\"NASCAR on TNT\":0.099,\"Boxing\":1},\"_expires\":\"2017-06-17T21:12:59.3201526Z\"},{\"XSentiment\":0.999998,\"_expires\":\"2017-06-17T21:13:01.7647699Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/sports/2017/06/kevin-durant-discusses-random-text-he-received-from-obama-after-winning-nba-finals\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/sports/2017/06/kevin-durant-discusses-random-text-he-received-from-obama-after-winning-nba-finals\"},\"j\":[{\"_title\":\"Kevin Durant Discusses 'Random' Text He Received From Obama After Winning NBA Finals\"},{\"RVisionTags\":{\"person\":0.9939494,\"outdoor\":0.9162231,\"male\":0.242890328},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0208446719,\"racyScore\":0.0304868072},\"_expires\":\"2017-06-17T18:28:04.5675692Z\"},{\"Emotion0\":{\"anger\":2.35385942E-05,\"contempt\":3.294205E-06,\"disgust\":1.38025935E-05,\"fear\":7.8729E-06,\"happiness\":0.9997165,\"neutral\":0.0001607666,\"sadness\":1.25915176E-05,\"surprise\":6.161377E-05},\"Emotion1\":{\"anger\":0.00371822272,\"contempt\":0.000460597221,\"disgust\":0.000157746123,\"fear\":0.000275517668,\"happiness\":0.0403934456,\"neutral\":0.86978966,\"sadness\":0.08499769,\"surprise\":0.000207107121},\"_expires\":\"2017-06-17T18:28:03.3944386Z\"},{\"Tags\":{\"Kevin Durant\":1,\"Random House\":0.069,\"Barack Obama\":1,\"UNK NBA\":1,\"Twitter Inc.\":1,\"USA Today\":0.999,\"Sports journalism\":0.01,\"Cary\":0.025,\"The NBA Finals\":0.017,\"Monday Night Football\":0.943,\"Golden State Warriors\":1,\"Bill Simmons\":0.866,\"Podcast\":0.918,\"Oracle Arena\":0.028,\"Bill Russell NBA Finals Most Valuable Player Award\":1,\"June 13\":0.034,\"Cleveland Cavaliers\":1,\"LeBron James\":1,\"Kyrie Irving\":1,\"Allen Iverson\":0.999,\"Rihanna\":0.63,\"The League\":0.166,\"Stay\":0.858,\"Singing\":0.024,\"President of the United States\":1},\"_expires\":\"2017-06-17T18:28:03.8094333Z\"},{\"XSentiment\":0.9999663,\"_expires\":\"2017-06-17T18:28:03.9031587Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/pop-culture/2017/06/tj-miller-hbo-special-interview\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/pop-culture/2017/06/tj-miller-hbo-special-interview\"},\"j\":[{\"_title\":\"T.J. Miller's Done With 'Silicon Valley,' But His Career's Just Getting Started\"},{\"RVisionTags\":{\"person\":0.999560535,\"man\":0.9939658,\"suit\":0.950625,\"outdoor\":0.9169477,\"wearing\":0.7708228,\"jacket\":0.528340757,\"coat\":0.490623325,\"dark\":0.32463637,\"male\":0.212570518,\"microphone\":0.148984566,\"crowd\":0.0156043554},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0102083739,\"racyScore\":0.0126593616},\"_expires\":\"2017-06-17T18:59:23.2248419Z\"},{\"Emotion0\":{\"anger\":1.0740634E-05,\"contempt\":1.42603E-05,\"disgust\":5.26589356E-05,\"fear\":1.20671484E-06,\"happiness\":0.996862,\"neutral\":0.00302408962,\"sadness\":5.683868E-06,\"surprise\":2.93453577E-05},\"_expires\":\"2017-06-17T18:59:22.2148365Z\"},{\"Tags\":{\"T. J. Miller\":0.999,\"Silicon Valley\":0.999,\"Whitney\":0.264,\"Twitter Inc.\":1,\"HBO\":1,\"Complex\":0.032,\"The Gorburger Show\":0.062,\"Funny or Die\":0.914,\"Comedy Central\":1,\"Japan\":1,\"Ridiculousness\":0.004,\"Deadpool\":0.117,\"Cloverfield\":0.921,\"Cannes Film Festival\":0.987,\"Energizer Bunny\":0.007,\"Amy Schumer\":0.021,\"Pete Holmes\":0.153,\"Peter Boyle\":0.003,\"Downtown Los Angeles\":0.843,\"Supervillain\":0.038,\"San Francisco\":0.809,\"Jesus Christ\":0.902,\"Kong: Skull Island\":0.751,\"Jordan Vogt-Roberts\":0.979,\"Usher\":0.325,\"Flea\":0.536,\"Henry Rollins\":0.096,\"Federal government of the United States\":0.485,\"Mike Judge\":0.974,\"Uber\":0.404,\"Chelsea Handler\":0.364},\"_expires\":\"2017-06-17T18:59:22.7021704Z\"},{\"_expires\":\"2017-06-17T18:59:22.7489821Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/life/2017/06/iphone-8-edge-to-edge-screen\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/life/2017/06/iphone-8-edge-to-edge-screen\"},\"j\":[{\"_title\":\"Newly-Leaked Pictures Show You What iPhone 8 Screen Might Look Like\"},{\"RVisionTags\":{\"iPod\":0.802692235,\"electronics\":0.7313406},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0145561891,\"racyScore\":0.0151244327},\"_expires\":\"2017-06-17T23:14:19.5133399Z\"},{\"_expires\":\"2017-06-17T23:14:18.3277963Z\"},{\"Tags\":{\"iOS\":0.492,\"Complex\":0.102,\"Twitter Inc.\":1,\"Imgur\":0.996,\"Apple Inc.\":1,\"Reddit\":0.57,\"Check It Out\":0.003,\"China\":0.813,\"iPhone\":1},\"_expires\":\"2017-06-17T23:14:18.0245382Z\"},{\"XSentiment\":0.9994883,\"_expires\":\"2017-06-17T23:14:18.7364306Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/sports/2017/06/ryan-destiny-get-sweaty\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/sports/2017/06/ryan-destiny-get-sweaty\"},\"j\":[{\"_title\":\"Ryan Destiny Talks About Starring in Hit TV Series 'Star' on Get Sweaty With Emily Oberg\"},{\"RVisionTags\":{\"person\":0.998844266,\"boxing\":0.573501348},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0180808976,\"racyScore\":0.07170816},\"_expires\":\"2017-06-17T19:55:45.6659091Z\"},{\"Emotion0\":{\"anger\":0.0428811572,\"contempt\":0.0120455148,\"disgust\":0.00588818826,\"fear\":0.0031699785,\"happiness\":0.0325962044,\"neutral\":0.839067638,\"sadness\":0.0560076348,\"surprise\":0.00834368449},\"Emotion1\":{\"anger\":9.522394E-08,\"contempt\":3.42922242E-08,\"disgust\":3.1531672E-06,\"fear\":4.24115054E-09,\"happiness\":0.999994,\"neutral\":2.3093894E-06,\"sadness\":1.65964408E-07,\"surprise\":2.82413E-07},\"_expires\":\"2017-06-17T19:55:44.6508371Z\"},{\"Tags\":{\"Destiny\":0.299,\"HiT TV\":0.008,\"Complex\":0.953,\"Twitter Inc.\":1,\"New York City\":1,\"Robert E. Lee\":0.004,\"Fox Broadcasting Company\":1,\"Naomi Campbell\":0.091,\"Sy Kravitz\":0.731,\"Queen Latifah\":0.876},\"_expires\":\"2017-06-17T19:55:44.2710467Z\"},{\"XSentiment\":0.113136955,\"_expires\":\"2017-06-17T19:55:45.0269667Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/music/2017/06/2-chainz-dna-freestyle-kendrick-lamar\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/music/2017/06/2-chainz-dna-freestyle-kendrick-lamar\"},\"j\":[{\"_title\":\"Watch 2 Chainz Flex on Kendrick Lamar's \\\"DNA\\\" Beat in New Freestyle\"}]},{\"_tag\":\"cmplx$http://www.complex.com/music/2017/06/everyday-struggle-ep39-kehlani-tinashe\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/music/2017/06/everyday-struggle-ep39-kehlani-tinashe\"},\"j\":[{\"_title\":\"Joe Budden and DJ Akademiks Discuss Tinashe Controversy and Kehlani Cussing Out Heckler on 'Everyday Struggle'\"},{\"RVisionTags\":{\"abstract\":0.5319324},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.09698458,\"racyScore\":0.08712957},\"_expires\":\"2017-06-17T17:56:47.5354099Z\"},{\"Emotion0\":{\"anger\":0.0121765705,\"contempt\":0.0152475182,\"disgust\":0.0186378732,\"fear\":0.00241010683,\"happiness\":0.202662408,\"neutral\":0.6867056,\"sadness\":0.0549196824,\"surprise\":0.00724024652},\"Emotion1\":{\"anger\":0.005693211,\"contempt\":0.0003093396,\"disgust\":0.000264122762,\"fear\":0.000122387908,\"happiness\":0.000162528377,\"neutral\":0.9901545,\"sadness\":0.00110488839,\"surprise\":0.00218904181},\"_expires\":\"2017-06-17T17:56:46.5115225Z\"},{\"Tags\":{\"Joe Budden\":0.972,\"Disc jockey\":0.998,\"Tinashe\":0.485,\"Kehlani\":0.011,\"Profanity\":0.006,\"Heckler\":0.003,\"Complex\":0.992,\"Twitter Inc.\":0.959,\"XXL\":0.906,\"Kyrie Irving\":0.277,\"LeBron James\":0.998,\"Michael Jordan\":0.989,\"Floyd Mayweather Jr.\":0.031,\"Conor McGregor\":0.276},\"_expires\":\"2017-06-17T17:56:46.0896119Z\"},{\"XSentiment\":0.005011654,\"_expires\":\"2017-06-17T17:56:46.6990201Z\"}]},{\"_tag\":\"cmplx$http://www.complex.com/sports/2017/06/lonzo-ball-interview\",\"i\":{\"constant\":1,\"id\":\"cmplx$http://www.complex.com/sports/2017/06/lonzo-ball-interview\"},\"j\":[{\"_title\":\"Lonzo Ball Finally Told Us How He Really Feels About LaVar's Media Antics\"},{\"RVisionTags\":{\"person\":0.9992661,\"sport\":0.9894762,\"athletic game\":0.9814596,\"basketball\":0.9540427,\"player\":0.897939742,\"crowd\":0.529209554,\"watching\":0.450753957},\"SVisionAdult\":{\"isAdultContent\":false,\"isRacyContent\":false,\"adultScore\":0.0182933789,\"racyScore\":0.0168850645},\"_expires\":\"2017-06-17T14:11:20.108671Z\"},{\"Emotion0\":{\"anger\":6.044781E-05,\"contempt\":2.85142833E-05,\"disgust\":4.95703644E-05,\"fear\":0.008861069,\"happiness\":2.874653E-05,\"neutral\":0.8961255,\"sadness\":0.00528799742,\"surprise\":0.08955817},\"_expires\":\"2017-06-17T14:11:19.6899531Z\"},{\"Tags\":{\"Los Angeles Angels of Anaheim\":0.515,\"Songwriter\":0.33,\"Twitter Inc.\":1,\"USA Today\":1,\"Broadcasting of sports events\":0.038,\"Chino Hills\":0.002,\"UCLA Bruins men's basketball\":0.6,\"NCAA Men's Division I Basketball Championship\":0.897,\"Sweet\":0.019,\"Todd Marinovich\":0.938,\"Marv Albert\":0.055,\"UNK NBA\":1,\"Fox Broadcasting Company\":0.933,\"Jayson Tatum\":0.004,\"ZO2\":0.008,\"Los Angeles Lakers\":1,\"Lamar Odom\":0.986,\"Lamar Cardinals Men's Basketball\":0.004,\"Magic Johnson\":0.995,\"Jason Kidd\":1,\"LeBron James\":1,\"James Harden\":0.99,\"Adidas\":0.811,\"Puerto Rico\":0.012,\"Stephen Curry\":0.981,\"Michael Jordan\":1,\"Shaquille O'Neal\":1},\"_expires\":\"2017-06-17T14:11:19.6587216Z\"},{\"XSentiment\":1,\"_expires\":\"2017-06-17T14:11:20.0892306Z\"}]}]},\"p\":[0.8153846,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154],\"VWState\":{\"m\":\"decc63fa2c284ec9887ee0572ea16d17/7860031216114d8bb718c40abc801bf4\"}}"; using (var vw = new VowpalWabbit("--cb_adf")) { var obj = JsonConvert.DeserializeObject(json); var bytes = new byte[Encoding.UTF8.GetMaxByteCount(json.Length) + 1]; var bytes2 = new byte[Encoding.UTF8.GetMaxByteCount(json.Length) + 1]; var byteLen = Encoding.UTF8.GetBytes(json, 0, json.Length, bytes, 0) + 1;// trailing \0 Array.Copy(bytes, bytes2, bytes.Length); VowpalWabbitDecisionServiceInteractionHeader header; List examples = null; try { examples = vw.ParseDecisionServiceJson(bytes, 0, byteLen, copyJson: true, header: out header); Assert.AreEqual("73369b13ec98433096a1496d27da0bfd", header.EventId); CollectionAssert.AreEqual(new[] { 9, 11, 13, 6, 4, 5, 12, 1, 2, 10, 8, 3, 7 }, header.Actions, "Actions mismatch"); CollectionAssert.AreEqual(new[] { 0.8153846f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f }, header.Probabilities, "Probabilities mismatch"); Assert.AreEqual(0, header.ProbabilityOfDrop); Assert.AreEqual(14, examples.Count); // check if copyJson: true was actually used CollectionAssert.AreEqual(bytes2, bytes); } finally { if (examples != null) foreach (var ex in examples) if (ex != null) ex.Dispose(); } } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestDecisionServiceJsonNull() { var json = @"{""Version"":""1"",""EventId"":""7cacacea2c6e49b5b922f6f517a325ed"",""a"":[9,4,13,10,8,5,2,3,12,11,7,6,1],""c"":{""_synthetic"":false,""User"":{""_age"":0},""Geo"":{""country"":""United States"",""_countrycf"":""8"",""state"":""Georgia"",""city"":""Stone Mountain"",""_citycf"":""5"",""dma"":""524""},""MRefer"":{""referer"":""http://www.complex.com/""},""OUserAgent"":{""_ua"":""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4"",""_DeviceBrand"":"""",""_DeviceFamily"":""Other"",""_DeviceIsSpider"":false,""_DeviceModel"":"""",""_OSFamily"":""Mac OS X"",""_OSMajor"":""10"",""_OSPatch"":""5"",""DeviceType"":""Desktop""},""_multi"":[{""_tag"":""cmplx$http://www.complex.com/music/2017/06/prodigy-mobb-deep-once-in-a-generation-rapper"",""i"":{""constant"":1,""id"":""cmplx$http://www.complex.com/music/2017/06/prodigy-mobb-deep-once-in-a-generation-rapper""},""j"":[{""_title"":""Why Prodigy Was A Once-In-A-Generation Rapper""},{""RVisionTags"":{""person"":0.9913805,""hat"":0.6433856,""male"":0.153918922},""SVisionAdult"":{""isAdultContent"":false,""isRacyContent"":false,""adultScore"":0.0162594952,""racyScore"":0.0152094},""TVisionCelebrities"":{""Prodigy"":0.9999119},""_expires"":""2017-06-24T22:43:00.9241929Z""},{""Emotion0"":{""anger"":0.005261584,""contempt"":0.01940289,""disgust"":0.00146069494,""fear"":7.486289E-05,""happiness"":0.0102698216,""neutral"":0.9544214,""sadness"":0.00859182,""surprise"":0.00051694276},""_expires"":""2017-06-24T22:43:00.0008415Z""},{""Tags"":{""Roc Marciano"":0.015,""Mobb Deep"":1,""Prodigy"":1,""Havoc"":1,""A Tribe Called Quest"":0.969,""Q-Tip"":0.992,""The Infamous"":1,""The Crystals"":0.01,""Fifth Beatle"":0.099},""_expires"":""2017-06-24T22:43:00.1727355Z""},{""XSentiment"":3.01036973E-13,""_expires"":""2017-06-24T22:43:00.9398236Z""}]},{""_tag"":""cmplx$http://www.complex.com/pop-culture/2017/06/best-movies-2017"",""i"":{""constant"":1,""id"":""cmplx$http://www.complex.com/pop-culture/2017/06/best-movies-2017""},""j"":[{""_title"":""The Best Movies of 2017 (So Far)""},{""RVisionTags"":{""text"":0.9992092,""book"":0.997986436},""SVisionAdult"":{""isAdultContent"":false,""isRacyContent"":false,""adultScore"":0.0431842171,""racyScore"":0.0587232448},""_expires"":""2017-06-25T03:08:01.2111373Z""},{""Emotion0"":{""anger"":0.005728849,""contempt"":0.00117533945,""disgust"":8.215821E-05,""fear"":1.32827354E-05,""happiness"":0.000487715733,""neutral"":0.9911194,""sadness"":0.000120451317,""surprise"":0.00127282448},""_expires"":""2017-06-25T03:08:00.8636383Z""},null,{""XSentiment"":1,""_expires"":""2017-06-25T03:08:01.5112947Z""}]},{""_tag"":""cmplx$http://www.complex.com/music/2017/06/rihanna-responds-to-dm-from-fan-seeking-advice-on-getting-over-his-first-heartbreak"",""i"":{""constant"":1,""id"":""cmplx$http://www.complex.com/music/2017/06/rihanna-responds-to-dm-from-fan-seeking-advice-on-getting-over-his-first-heartbreak""},""j"":[{""_title"":""Rihanna Responds to DM From Fan Seeking Advice on Getting Over His First Heartbreak""},{""RVisionTags"":{""person"":0.99633044},""SVisionAdult"":{""isAdultContent"":false,""isRacyContent"":false,""adultScore"":0.013516671,""racyScore"":0.048148632},""_expires"":""2017-06-25T02:01:20.2363494Z""},{""Emotion0"":{""anger"":3.085194E-06,""contempt"":0.000411317043,""disgust"":1.026042E-05,""fear"":3.766979E-07,""happiness"":0.9783716,""neutral"":0.0211082119,""sadness"":5.66137569E-05,""surprise"":3.850023E-05},""_expires"":""2017-06-25T02:01:19.7050726Z""},{""Tags"":{""Rihanna"":1,""Martinez"":0.021,""Complex"":0.834,""Twitter Inc."":1,""If You"":0.003,""Grammy Awards"":1,""Mathematics"":0.999,""Malawi"":0.298,""Christine Teigen"":0.009,""Dave Chappelle"":0.047,""Presenter"":0.892,""Kendrick Lamar"":0.984,""Diamonds"":0.998},""_expires"":""2017-06-25T02:01:19.4155039Z""},{""XSentiment"":1,""_expires"":""2017-06-25T02:01:24.9467431Z""}]},{""_tag"":""cmplx$http://www.complex.com/music/2017/06/rihanna-responds-to-dm-from-fan-seeking-advice-on-getting-over-his-first-heartbreak"",""i"":{""constant"":1,""id"":""cmplx$http://www.complex.com/music/2017/06/rihanna-responds-to-dm-from-fan-seeking-advice-on-getting-over-his-first-heartbreak""},""j"":[{""_title"":""Rihanna Responds to DM From Fan Seeking Advice on Getting Over His First Heartbreak""}]},{""_tag"":""cmplx$http://www.complex.com/life/2017/06/guy-changes-from-shorts-to-dress-after-getting-sent-home-from-work-on-hot-day"",""i"":{""constant"":1,""id"":""cmplx$http://www.complex.com/life/2017/06/guy-changes-from-shorts-to-dress-after-getting-sent-home-from-work-on-hot-day""},""j"":[{""_title"":""Guy Sent Home by Boss for Wearing Shorts on a Hot Day, Returns to Work in Mom's Dress""},{""RVisionTags"":{""person"":0.999545038,""indoor"":0.998509467,""wall"":0.997952163},""SVisionAdult"":{""isAdultContent"":false,""isRacyContent"":false,""adultScore"":0.034187492,""racyScore"":0.0335518233},""_expires"":""2017-06-24T21:27:46.7135793Z""},{""Emotion0"":{""anger"":7.7880286E-05,""contempt"":0.00116215891,""disgust"":4.65850326E-06,""fear"":3.797253E-06,""happiness"":2.82076635E-05,""neutral"":0.986993253,""sadness"":0.0116343917,""surprise"":9.564323E-05},""_expires"":""2017-06-24T21:27:46.1197986Z""},{""Tags"":{""Boss Corporation"":0.007,""Shorts"":0.139,""Complex"":0.169,""Twitter Inc."":1,""English"":0.603,""The Daily Mirror"":0.008,""Oklahoma"":0.948,""High school"":0.999,""Lists of National Basketball Association players"":0.072,""UNK NBA"":0.529,""NBA dress code"":0.031,""Dress code"":0.113},""_expires"":""2017-06-24T21:27:46.0729078Z""},{""XSentiment"":0.9995655,""_expires"":""2017-06-24T21:27:46.7448359Z""}]},{""_tag"":""cmplx$http://www.complex.com/pop-culture/2017/06/tom-cruise-allegedly-balanced-bible-study-and-blow-jobs-away-from-risky-business-set"",""i"":{""constant"":1,""id"":""cmplx$http://www.complex.com/pop-culture/2017/06/tom-cruise-allegedly-balanced-bible-study-and-blow-jobs-away-from-risky-business-set""},""j"":[{""_title"":""Tom Cruise Was Allegedly Balancing Bible Study and Blow Jobs Away From the 'Risky Business' Set""},{""RVisionTags"":{""person"":0.9992724,""man"":0.9368908},""SVisionAdult"":{""isAdultContent"":false,""isRacyContent"":false,""adultScore"":0.009141326,""racyScore"":0.00951602},""TVisionCelebrities"":{""TOM CRUISE"":0.999997854},""_expires"":""2017-06-24T22:43:00.578981Z""},{""Emotion0"":{""anger"":1.77455458E-05,""contempt"":0.000173967157,""disgust"":0.000229260724,""fear"":3.15066657E-08,""happiness"":0.9786317,""neutral"":0.0209334176,""sadness"":5.861598E-07,""surprise"":1.33144977E-05},""_expires"":""2017-06-24T22:43:00.0793561Z""},{""Tags"":{""Connor Cruise"":1,""The Bible"":1,""Risky Business"":0.999,""Martinez"":0.006,""Complex"":0.01,""Twitter Inc."":1,""Sean Penn"":0.989,""Curtis Armstrong"":0.625,""The Hollywood Reporter"":0.203,""Louis Armstrong"":0.348,""Cruise ship"":1,""Chicago"":0.997,""Rebecca De Mornay"":0.434,""Christianity"":0.75,""James Corden"":0.069,""Hollywood"":1,""Stunt"":0.45},""_expires"":""2017-06-24T22:43:00.0008415Z""},{""XSentiment"":0.178008512,""_expires"":""2017-06-24T22:43:01.2750814Z""}]},{""_tag"":""cmplx$http://www.complex.com/music/2017/06/goldlink-releases-crew-remix-featuring-gucci-mane"",""i"":{""constant"":1,""id"":""cmplx$http://www.complex.com/music/2017/06/goldlink-releases-crew-remix-featuring-gucci-mane""},""j"":[{""_title"":""Gucci Mane Jumps on Remix of GoldLink's \""Crew\""""},{""RVisionTags"":{""text"":0.9904163},""SVisionAdult"":{""isAdultContent"":false,""isRacyContent"":false,""adultScore"":0.140676036,""racyScore"":0.08041213},""_expires"":""2017-06-25T03:29:03.5393778Z""},{""Emotion0"":{""anger"":0.09716808,""contempt"":0.00540762255,""disgust"":0.0220363177,""fear"":0.0168951228,""happiness"":0.00362249953,""neutral"":0.772761941,""sadness"":0.028037779,""surprise"":0.05407063},""Emotion1"":{""anger"":0.419406265,""contempt"":0.032055337,""disgust"":0.03494472,""fear"":0.004095346,""happiness"":0.218424827,""neutral"":0.2713901,""sadness"":0.0143981427,""surprise"":0.005285231},""_expires"":""2017-06-25T03:29:01.7568819Z""},{""Tags"":{""Gucci Mane"":0.993,""Goldlink"":0.246,""Joshua"":0.003,""Twitter Inc."":0.987,""Public Relations"":0.054,""Washington, D.C."":1,""Shy Glizzy"":0.149,""Turntablism"":0.004,""Shazam"":0.915,""Apple Music"":0.473,""You Can"":0.003,""iTunes"":0.956,""United States"":1,""Country"":0.996,""Miami Heat"":0.888,""Houston Rockets"":0.81,""Los Angeles"":1,""Portland Trail Blazers"":0.972,""Brooklyn Nets"":0.74,""Philadelphia"":1,""Monument Records"":0.173,""Go-go"":0.558,""Down"":0.126,""Album"":0.984,""Xavier Musketeers men's basketball"":0.011,""TEAM*"":0.006,""No Way Out"":0.007},""_expires"":""2017-06-25T03:29:01.4924568Z""},{""XSentiment"":0.7643385,""_expires"":""2017-06-25T03:29:02.1167452Z""}]},{""_tag"":""cmplx$http://www.complex.com/music/2017/06/snoop-dogg-mock-young-thug-and-lil-uzi-vert-moment-i-feared-video"",""i"":{""constant"":1,""id"":""cmplx$http://www.complex.com/music/2017/06/snoop-dogg-mock-young-thug-and-lil-uzi-vert-moment-i-feared-video""},""j"":[{""_title"":""It Looks Like Snoop Dogg Is Mocking Young Thug and Lil Uzi Vert in New Video for \""Moment I Feared\""""},{""RVisionTags"":{""person"":0.998487234},""SVisionAdult"":{""isAdultContent"":false,""isRacyContent"":false,""adultScore"":0.0242747813,""racyScore"":0.02104937},""_expires"":""2017-06-24T20:58:27.8900749Z""},{""Emotion0"":{""anger"":0.000228447025,""contempt"":0.0007477614,""disgust"":0.000145930273,""fear"":1.614125E-07,""happiness"":0.00138306723,""neutral"":0.997234,""sadness"":0.000220693677,""surprise"":3.99426281E-05},""_expires"":""2017-06-24T20:58:27.5944467Z""},{""Tags"":{""Snoop Dogg"":1,""Young Thug"":0.959,""Uzi"":0.67,""Vert (music producer)"":0.003,""New Video"":0.003,""Kyle Broflovski"":0.032,""Philadelphia"":0.006,""Twitter Inc."":1,""Subscription business model"":0.008,""Complex"":0.995,""WorldStarHipHop"":0.237,""Rick Rock"":0.449,""Fonzie"":0.026,""Hyphy"":0.228,""YouTube"":0.999,""300 Entertainment"":0.009,""I Do"":0.006,""Billboard Music Award for Woman of the Year"":0.007,""You Know What It Is"":0.007,""Beautiful"":1,""Hip hop music"":1},""_expires"":""2017-06-24T20:58:27.1518034Z""},{""XSentiment"":0.0003323581,""_expires"":""2017-06-24T20:58:28.2026657Z""}]},{""_tag"":""cmplx$http://www.complex.com/sports/2017/06/will-lebron-james-ever-be-the-goat-square-up"",""i"":{""constant"":1,""id"":""cmplx$http://www.complex.com/sports/2017/06/will-lebron-james-ever-be-the-goat-square-up""},""j"":[{""_title"":""Square Up: Will LeBron James Ever Be the G.O.A.T.?""},{""RVisionTags"":{""person"":0.996526659,""outdoor"":0.8773945,""player"":0.8584581,""athletic game"":0.8081653,""sport"":0.7153003,""green"":0.640901864},""SVisionAdult"":{""isAdultContent"":false,""isRacyContent"":false,""adultScore"":0.0165802147,""racyScore"":0.0130834375},""_expires"":""2017-06-24T17:04:57.3297536Z""},{""Emotion0"":{""anger"":0.03943785,""contempt"":0.05141066,""disgust"":0.0008398856,""fear"":3.766839E-05,""happiness"":0.00017841009,""neutral"":0.9013937,""sadness"":0.00586816063,""surprise"":0.0008336294},""_expires"":""2017-06-24T17:04:55.650471Z""},{""Tags"":{""Square, Inc."":0.005,""LeBron James"":1,""Complex"":0.926,""Twitter Inc."":0.991,""UNK NBA"":1,""Lil B"":0.415,""Cleveland Cavaliers"":1,""Golden State Warriors"":1},""_expires"":""2017-06-24T17:04:54.617989Z""},{""XSentiment"":0.00127977342,""_expires"":""2017-06-24T17:04:55.400445Z""}]},{""_tag"":""cmplx$http://www.complex.com/life/2017/06/cop-who-killed-philando-castile-says-smell-of-weed-made-him-fear-for-life"",""i"":{""constant"":1,""id"":""cmplx$http://www.complex.com/life/2017/06/cop-who-killed-philando-castile-says-smell-of-weed-made-him-fear-for-life""},""j"":[{""_title"":""Cop Who Killed Philando Castile Says Smell of Weed Made Him Fear for His Life""},{""RVisionTags"":{""sky"":0.998623848,""outdoor"":0.980991364},""SVisionAdult"":{""isAdultContent"":false,""isRacyContent"":false,""adultScore"":0.01796771,""racyScore"":0.0286844},""_expires"":""2017-06-24T16:33:36.9062355Z""},{""_expires"":""2017-06-24T16:33:36.2030935Z""},{""Tags"":{""Scared"":0.011,""Philadelphia"":0.693,""Twitter Inc."":1,""Ramsey County"":0.995,""County attorney"":0.084,""Minnesota"":1,""The Life of the Party"":0.009,""And I"":0.003,""Police"":0.238,""Dashcam"":0.092,""Murder"":0.673,""Complex"":0.508},""_expires"":""2017-06-24T16:33:35.8124592Z""},{""XSentiment"":4.440892E-16,""_expires"":""2017-06-24T16:33:36.4218429Z""}]},{""_tag"":""cmplx$http://www.complex.com/music/2017/06/a-history-of-bow-wow-fails"",""i"":{""constant"":1,""id"":""cmplx$http://www.complex.com/music/2017/06/a-history-of-bow-wow-fails""},""j"":[{""_title"":""A History of Bow Wow Taking L's""},{""RVisionTags"":{""person"":0.999946833,""crowd"":0.236136854},""SVisionAdult"":{""isAdultContent"":false,""isRacyContent"":false,""adultScore"":0.02204849,""racyScore"":0.0223767031},""_expires"":""2017-06-24T15:56:22.5001293Z""},{""Emotion0"":{""anger"":1.22613847E-05,""contempt"":0.0200665444,""disgust"":0.000140431745,""fear"":8.001536E-09,""happiness"":0.883786,""neutral"":0.0959461853,""sadness"":1.50497419E-06,""surprise"":4.703166E-05},""_expires"":""2017-06-24T15:56:21.7295682Z""},{""Tags"":{""Bow Wow"":1,""He Is"":0.003,""Forbes"":0.995,""Hip hop music"":1,""iTunes"":0.974,""Atlantic Ocean"":0.051,""Vibe"":0.982,""The Source"":0.991,""GQ"":1,""Esquire"":0.998,""Stephen Sondheim"":0.606,""Twitter Inc."":1,""Snoop Dogg"":1,""The Arsenio Hall Show"":0.043,""Kurtis Blow"":0.325,""Michael Jordan"":0.785,""Vine"":0.585,""Oh, hell"":0.005,""Rent"":0.155,""Los Angeles"":1,""Grammy Awards"":1,""Scuderia Ferrari"":0.099,""Instagram"":0.996,""Migos"":0.01,""December 8"":0.003,""Live television"":0.401,""Grammy Awards Ceremony"":0.102,""Timothy Sykes"":0.173,""President of the United States"":0.998,""Donald Trump"":0.069,""Pacific Time Zone"":0.278,""Funkmaster Flex"":0.375,""Jermaine Dupri"":0.996,""Complex"":0.959},""_expires"":""2017-06-24T15:56:22.1957887Z""},{""XSentiment"":1,""_expires"":""2017-06-24T15:56:22.336416Z""}]},{""_tag"":""cmplx$http://www.complex.com/style/2017/06/how-rhude-one-of-best-los-angeles-brands-started-with-a-single-t-shirt"",""i"":{""constant"":1,""id"":""cmplx$http://www.complex.com/style/2017/06/how-rhude-one-of-best-los-angeles-brands-started-with-a-single-t-shirt""},""j"":[{""_title"":""How Rhude, One of the Best L.A. Brands, Started With a Single T-Shirt ""},{""RVisionTags"":{""person"":0.993622959,""floor"":0.989562333,""indoor"":0.98370254,""standing"":0.810940444,""curtain"":0.7955723,""suit"":0.335544765},""SVisionAdult"":{""isAdultContent"":false,""isRacyContent"":false,""adultScore"":0.08148283,""racyScore"":0.0423882678},""_expires"":""2017-06-24T12:04:37.9596756Z""},{""Emotion0"":{""anger"":0.0006152864,""contempt"":0.00599214761,""disgust"":0.0002703283,""fear"":1.61605785E-05,""happiness"":0.00174113235,""neutral"":0.984110057,""sadness"":0.006996317,""surprise"":0.0002585811},""Emotion1"":{""anger"":3.47017163E-07,""contempt"":0.00013010086,""disgust"":2.59245485E-06,""fear"":1.34914092E-06,""happiness"":0.000740572,""neutral"":0.997228861,""sadness"":0.00182852661,""surprise"":6.765087E-05},""_expires"":""2017-06-24T12:04:37.3269255Z""},{""Tags"":{""The Best"":0.013,""Starting pitcher"":0.489,""T-shirt"":0.041,""Twitter Inc."":0.955,""Mixmaster Spade"":0.977,""Kendrick Lamar"":1,""Snoop Dogg"":1,""BET Awards"":0.014,""ASAP Rocky"":0.979,""Kevin Durant"":0.023,""Jimmy Butler"":0.052,""Barneys New York"":0.011,""Patron saint"":0.034,""United States"":1,""Big Sean"":0.986,""Manila"":0.676,""Culture of the Philippines"":0.938,""Trade in Services Agreement"":0.006,""Kanye West"":1,""Arnold Schwarzenegger"":0.004,""Comme des Garçons"":0.987,""Rei Kawakubo"":0.911,""Sugar Land"":0.671,""Texas"":1,""Posttraumatic stress disorder"":0.02,""Earth, Wind & Fire"":0.003,""Mike Jones"":0.007,""White American"":0.012,""Prada Marfa"":1,""Elmgreen and Dragset"":0.992,""Complex"":0.992},""_expires"":""2017-06-24T12:04:36.8672077Z""},{""XSentiment"":1,""_expires"":""2017-06-24T12:04:37.5488193Z""}]},{""_tag"":""cmplx$http://www.complex.com/music/2017/06/bow-wow-gets-backlash-over-sexist-instagram-caption"",""i"":{""constant"":1,""id"":""cmplx$http://www.complex.com/music/2017/06/bow-wow-gets-backlash-over-sexist-instagram-caption""},""j"":[{""_title"":""The Internet Blasts Bow Wow for His Message About Women's Behavior""},{""RVisionTags"":{""person"":0.962055564,""man"":0.9547968},""SVisionAdult"":{""isAdultContent"":false,""isRacyContent"":false,""adultScore"":0.0112607479,""racyScore"":0.0106668333},""TVisionCelebrities"":{""Bow Wow"":0.9601661},""_expires"":""2017-06-24T01:43:32.5645689Z""},{""Emotion0"":{""anger"":0.0011651055,""contempt"":0.005131879,""disgust"":0.000264819653,""fear"":2.01475978E-05,""happiness"":0.000101240934,""neutral"":0.98257935,""sadness"":0.0105332062,""surprise"":0.000204226963},""_expires"":""2017-06-24T01:43:31.8143804Z""},{""Tags"":{""The Internet"":0.661,""Bow Wow"":0.398,""Twitter Inc."":1,""Instagram"":0.998,""I Wonder Why"":0.004,""Sexism"":0.362},""_expires"":""2017-06-24T01:43:31.5472109Z""},{""XSentiment"":0.99999994,""_expires"":""2017-06-24T01:43:32.2439044Z""}]}]},""p"":[0.8153846,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154,0.0153846154],""VWState"":{""m"":""4c5bcf8eb1ef4d3ba327dacba2f336a1/4c5bcf8eb1ef4d3ba327dacba2f336a1""}}"; using (var vw = new VowpalWabbit("--cb_adf")) { var obj = JsonConvert.DeserializeObject(json); var bytes = new byte[Encoding.UTF8.GetMaxByteCount(json.Length) + 1]; var byteLen = Encoding.UTF8.GetBytes(json, 0, json.Length, bytes, 0) + 1;// trailing \0 VowpalWabbitDecisionServiceInteractionHeader header; List examples = null; try { examples = vw.ParseDecisionServiceJson(bytes, 0, byteLen, copyJson: false, header: out header); Assert.AreEqual("7cacacea2c6e49b5b922f6f517a325ed", header.EventId); CollectionAssert.AreEqual(new[] { 9, 4, 13, 10, 8, 5, 2, 3, 12, 11, 7, 6, 1 }, header.Actions, "Actions mismatch"); CollectionAssert.AreEqual(new[] { 0.8153846f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f, 0.0153846154f }, header.Probabilities, "Probabilities mismatch"); Assert.AreEqual(0, header.ProbabilityOfDrop); Assert.AreEqual(14, examples.Count); } finally { if (examples != null) foreach (var ex in examples) if (ex != null) ex.Dispose(); } } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestPartialJson() { using (var validator = new VowpalWabbitExampleJsonValidator(new VowpalWabbitSettings { Arguments = "--cb_adf", PropertyConfiguration = new PropertyConfiguration { MultiProperty = "adf", TextProperty = "someText", FeatureIgnorePrefix = "xxx" } })) { var json = "{\"eventId\":123,\"c\":{\"Age\":25,\"adf\":[{\"someText\":\"w1 w2\", \"a\":{\"x\":1}, \"xxxxIgnoreMe\":2}, {\"someText\":\"w2 w3\"}], \"_labelIndex\":1, \"_label_Cost\":-1, \"_label_Probability\":0.3},\"post\":456}"; using (var textReader = new JsonTextReader(new StringReader(json))) { textReader.Read(); Assert.AreEqual(JsonToken.StartObject, textReader.TokenType); textReader.Read(); Assert.AreEqual(JsonToken.PropertyName, textReader.TokenType); Assert.AreEqual("eventId", textReader.Value); textReader.Read(); Assert.AreEqual(JsonToken.Integer, textReader.TokenType); Assert.AreEqual((Int64)123, textReader.Value); textReader.Read(); Assert.AreEqual(JsonToken.PropertyName, textReader.TokenType); Assert.AreEqual("c", textReader.Value); textReader.Read(); validator.Validate(new[] { "shared | Age:25", " | w1 w2 |a x:1", "0:-1:.3 | w2 w3" }, textReader, VowpalWabbitLabelComparator.ContextualBandit); textReader.Read(); Assert.AreEqual(JsonToken.PropertyName, textReader.TokenType); Assert.AreEqual("post", textReader.Value); textReader.Read(); Assert.AreEqual((Int64)456, textReader.Value); } } } private void TestDecisionServiceJson(string json, bool expectException = true) { using (var vw = new VowpalWabbit("--cb_adf")) { var bytes = Encoding.UTF8.GetBytes(json); VowpalWabbitDecisionServiceInteractionHeader header; List examples = null; try { examples = vw.ParseDecisionServiceJson(bytes, 0, bytes.Length, copyJson: false, header: out header); if (expectException) Assert.Fail("Excepted exception"); } catch (Exception) { if (!expectException) throw; } finally { if (examples != null) foreach (var ex in examples) if (ex != null) ex.Dispose(); } } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestDecisionServiceJsonOutOfBounds() { var json = @"{""EventId"":""abc"",""a"":[1,2,3],""Version"":""1"",""c"":{""u"":{""loc"":""New York""},""_multi"":[]},""p"":[0.8,0.1,0.1]}"; TestDecisionServiceJson(json); for (int i = json.Length; i >= 0; i--) { var jsonSub = json.Substring(0, i); Console.WriteLine(jsonSub); Console.Out.Flush(); TestDecisionServiceJson(jsonSub, true); } for (int i = 1; i < json.Length; i++) { var jsonSub = json.Substring(i, json.Length - i); Console.WriteLine(jsonSub); Console.Out.Flush(); TestDecisionServiceJson(jsonSub, true); } } public class MyContext { [Feature] public int Feature { get; set; } [JsonProperty("_multi")] public IEnumerable Multi { get; set; } } public class MyADF { [Feature] public int Foo { get; set; } } [TestMethod] public void TestNumADFs() { var jsonDirectSerializer = VowpalWabbitSerializerFactory.CreateSerializer(new VowpalWabbitSettings { TypeInspector = JsonTypeInspector.Default }) as IVowpalWabbitMultiExampleSerializerCompiler; Assert.IsNotNull(jsonDirectSerializer); Assert.AreEqual(3, jsonDirectSerializer.GetNumberOfActionDependentExamples(new MyContext { Multi = new MyADF[3] })); } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestJsonDict.cs000066400000000000000000000233261332666127000225130ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using Newtonsoft.Json; using Newtonsoft.Json.Serialization; using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Text; using System.Threading; using System.Threading.Tasks; using VW; using VW.Serializer; namespace cs_unittest { [TestClass] public class TestJsonDictClass { public class Context { public Context(float[] vector, int id, JsonSerializerSettings settings) { this.Vector = vector; this.Id = id; this.JSON = JsonConvert.SerializeObject(this, settings); var sb = new StringBuilder(); sb.AppendFormat("| Id:{0}", this.Id); foreach (var v in vector) sb.AppendFormat(CultureInfo.InvariantCulture, " :{0}", v); this.VW = sb.ToString(); } [JsonProperty(IsReference = true)] public float[] Vector { get; private set; } [JsonProperty] public int Id { get; private set; } [JsonIgnore] public string VW { get; set; } [JsonIgnore] public string JSON { get; set; } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestJsonDict() { var vec = new float[] { 1, 2, 3 }; var jsonResolver = new RefResolve(); var settings = new JsonSerializerSettings { ReferenceResolverProvider = () => jsonResolver }; var ctx1 = new Context(vec, 1, settings); var ctx2 = new Context(vec, 2, settings); using (var vw = new VowpalWabbit(new VowpalWabbitSettings { EnableStringExampleGeneration = true })) using (var resolver = new VowpalWabbitJsonReferenceResolver(serializer => Assert.Fail())) using (var serializer1 = new VowpalWabbitJsonSerializer(vw, resolver)) using (var example1 = serializer1.ParseAndCreate(ctx1.JSON)) using (var serializer2 = new VowpalWabbitJsonSerializer(vw, resolver)) using (var example2 = serializer2.ParseAndCreate(ctx2.JSON)) using (var validator = new VowpalWabbitExampleJsonValidator()) { validator.Validate("| Id:1 :1 :2 :3", example1); validator.Validate(ctx1.VW, example1); validator.Validate("| Id:2 :1 :2 :3", example2); validator.Validate(ctx2.VW, example2); } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestJsonDictReverse() { var vec = new float[] { 1, 2, 3 }; var jsonResolver = new RefResolve(); var settings = new JsonSerializerSettings { ReferenceResolverProvider = () => jsonResolver }; var ctx1 = new Context(vec, 1, settings); var ctx2 = new Context(vec, 2, settings); VowpalWabbitJsonSerializer delayedSerializer = null; using (var validator = new VowpalWabbitExampleJsonValidator()) using (var vw = new VowpalWabbit(new VowpalWabbitSettings { EnableStringExampleGeneration = true })) using (var resolver = new VowpalWabbitJsonReferenceResolver(serializer => delayedSerializer = serializer)) { var serializer2 = new VowpalWabbitJsonSerializer(vw, resolver); var example2 = serializer2.ParseAndCreate(ctx2.JSON); // incomplete data Assert.IsNull(example2); // triggers example2 completion using (var serializer1 = new VowpalWabbitJsonSerializer(vw, resolver)) using (var example1 = serializer1.ParseAndCreate(ctx1.JSON)) { validator.Validate("| Id:1 :1 :2 :3", example1); } Assert.IsNotNull(delayedSerializer); using (var delayedExample2 = delayedSerializer.CreateExamples()) { validator.Validate("| Id:2 :1 :2 :3", delayedExample2); } delayedSerializer.Dispose(); } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestJsonDictThreading() { var jsonResolver = new RefResolve(); var settings = new JsonSerializerSettings { ReferenceResolverProvider = () => jsonResolver }; var rnd = new Random(123); var examples = new List(); var id = 0; // different reference objects for (int i = 0; i < 10; i++) { var data = Enumerable.Range(1, 5).Select(_ => (float)rnd.Next(10)).ToArray(); // referencing the same data for (int j = 0; j < 5; j++) examples.Add(new Context(data, id++, settings)); } for (int i = 0; i < 4; i++) { Permute(examples, rnd); for (int maxDegreeOfParallelism = 1; maxDegreeOfParallelism < 4; maxDegreeOfParallelism++) { var examplesFound = 0; using (var vw = new VowpalWabbit(new VowpalWabbitSettings { EnableStringExampleGeneration = true, EnableThreadSafeExamplePooling = true })) using (var resolver = new VowpalWabbitJsonReferenceResolver(serializer => { using (var example = serializer.CreateExamples()) { ValidateExample(example, (Context)serializer.UserContext); } serializer.Dispose(); Interlocked.Increment(ref examplesFound); })) { Parallel.ForEach( Partitioner.Create(0, examples.Count), new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, range => { for (int j = range.Item1; j < range.Item2; j++) { var ctx = examples[j]; var serializer = new VowpalWabbitJsonSerializer(vw, resolver) { UserContext = ctx }; var example = serializer.ParseAndCreate(ctx.JSON); // example not ready yet if (example == null) continue; ValidateExample(example, ctx); example.Dispose(); serializer.Dispose(); Interlocked.Increment(ref examplesFound); } }); } Assert.AreEqual(examples.Count, examplesFound); } } } public void ValidateExample(VowpalWabbitExampleCollection example, Context ctx) { using (var vw = new VowpalWabbit(new VowpalWabbitSettings { EnableStringExampleGeneration = true })) using (var validator = new VowpalWabbitExampleJsonValidator()) { var singleExample = (VowpalWabbitSingleLineExampleCollection)example; validator.Validate(ctx.VW, example); } } public static void Permute(List arr, Random rnd) { for (int i = 0; i < arr.Count - 1; i++) { int swapIndex = rnd.Next(i, arr.Count); T temp = arr[swapIndex]; arr[swapIndex] = arr[i]; arr[i] = temp; } } public class RefResolve : IReferenceResolver { private readonly IDictionary data; private readonly IDictionary otherData; private class ReferenceEqualityComparer : IEqualityComparer { bool IEqualityComparer.Equals(object x, object y) { return object.ReferenceEquals(x, y); } public int GetHashCode(object obj) { return obj.GetHashCode(); } } public RefResolve() { this.data = new Dictionary(new ReferenceEqualityComparer()); this.otherData = new Dictionary(); } public object ResolveReference(object context, string reference) { return this.otherData[reference]; } public string GetReference(object context, object value) { foreach (var kv in this.otherData) { if (object.ReferenceEquals(kv.Value, value)) return kv.Key; } var id = Guid.NewGuid().ToString(); this.AddReference(null, id, value); return id; } public bool IsReferenced(object context, object value) { return this.otherData.Any(kv => object.ReferenceEquals(kv.Value, value)) || this.data.ContainsKey(value); } public void AddReference(object context, string reference, object value) { this.otherData.Add(reference, value); this.data.Add(value, value); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestJsonDirect.cs000066400000000000000000000371201332666127000230370ustar00rootroot00000000000000// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Microsoft.VisualStudio.TestTools.UnitTesting; using Newtonsoft.Json; using System.Collections.Generic; using System.Linq; using VW; using VW.Labels; using VW.Serializer; namespace cs_unittest { [TestClass] public class TestJsonDirectClass { [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonDirect() { using (var vw = new VowpalWabbitExampleValidator(new VowpalWabbitSettings { TypeInspector = JsonTypeInspector.Default })) { vw.Validate("| Clicks:5 |a Bar:1 Age25_old |b Marker", new JsonContext() { Ns1 = new Namespace1 { Foo = 1, Age = "25 old", DontConsider = "XXX" }, Ns2 = new Namespace2 { FeatureA = true }, Clicks = 5 }); vw.Validate("| Clicks:5 |a Bar:1", new JsonContext() { Ns1 = new Namespace1 { Foo = 1, DontConsider = "XXX" }, Clicks = 5, IgnoreMe = "true" }); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonDirectWithLabel() { using (var vw = new VowpalWabbitExampleValidator(new VowpalWabbitSettings { TypeInspector = JsonTypeInspector.Default })) { vw.Validate("13 | Clicks:5 MoreClicks:3", new JsonContext() { Label = new SimpleLabel { Label = 13 }, Clicks = 5, MoreClicks = 3, IgnoreMe2 = "YYY" }); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonOptIn() { using (var vw = new VowpalWabbitExampleValidator(new VowpalWabbitSettings { TypeInspector = JsonTypeInspector.Default })) { vw.Validate("| Clicked |Ns2 Marker", new JsonContextOptIn() { Clicked = true, IgnoredNamespace = new Namespace1 { Foo = 3 }, Ns2 = new Namespace2 { FeatureA = true } }); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestLabelJsonSerialization() { Assert.AreEqual( "{\"_label\":{\"Label\":25.0},\"Clicks\":0,\"MoreClicks\":3}", JsonConvert.SerializeObject(new JsonContext() { Label = new SimpleLabel { Label = 25 }, MoreClicks = 3, IgnoreMe = "XXX" })); } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonArray() { using (var vw = new VowpalWabbitExampleValidator(new VowpalWabbitSettings { TypeInspector = JsonTypeInspector.Default })) { vw.Validate("1:2:.5 |Data :.1 :.2 :.3", new JsonContextArray() { Label = new ContextualBanditLabel { Action = 1, Cost = 2, Probability = .5f }, Data = new[] { .1f, .2f, .3f } }); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] [TestCategory("Vowpal Wabbit/Marshal")] public void TestJsonDictionaryStringFloat() { using (var vw = new VowpalWabbitExampleValidator(new VowpalWabbitSettings { TypeInspector = JsonTypeInspector.Default })) { vw.Validate("|Features Feature1:2.1 Feature2:3.2", new POCODict { Features = new Dictionary { { "Feature1", 2.1f }, { "Feature2", 3.2f } } }); } // test serialzier caching too using (var vw = new VowpalWabbitExampleValidator(new VowpalWabbitSettings { TypeInspector = TypeInspector.All })) { vw.Validate("| Abc:2.1 def:3.2", new POCODict { Features = new Dictionary { { "Abc", 2.1f }, { "def", 3.2f } } }); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonByte() { using (var vw = new VowpalWabbitExampleValidator(new VowpalWabbitSettings { TypeInspector = JsonTypeInspector.Default })) { vw.Validate("| Feature:25", new JsonContextByte { Feature = 25 }); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonDirectText() { using (var vw = new VowpalWabbitExampleValidator(new VowpalWabbitSettings { TypeInspector = JsonTypeInspector.Default })) { vw.Validate("| a b c |a d e f", new JsonText { Text = "a b c", AuxInfo = "Foo", A = new JsonText { Text = "d e f", AuxInfo = "Bar" } }); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonDirectMulti() { using (var vw = new VowpalWabbitExampleValidator(new VowpalWabbitSettings("--cb_adf") { TypeInspector = JsonTypeInspector.Default })) { vw.Validate(new[] { "shared | Ageteen", " | Id:1", " | Id:2" }, new JsonShared { Age = "teen", Documents = new[] { new JsonADF { Id = 1 }, new JsonADF { Id = 2 } } }); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonDirectMultiList() { using (var vw = new VowpalWabbitExampleValidator(new VowpalWabbitSettings("--cb_adf") { TypeInspector = JsonTypeInspector.Default })) { vw.Validate(new[] { " | Id:1", " | Id:2" }, new JsonSharedList { _multi = new List { new JsonADF { Id = 1 }, new JsonADF { Id = 2 } } }); vw.Validate(new[] { "shared | Ageteen", " | Id:1" }, new JsonSharedList { Age = "teen", _multi = new List { new JsonADF { Id = 1 } } }); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonDirectMultiEmpty() { using (var vw = new VowpalWabbitExampleValidator(new VowpalWabbitSettings { TypeInspector = JsonTypeInspector.Default })) { vw.Validate(new[] { " | Id:1", " | Id:2" }, new JsonSharedEmpty { Age = "ignored", _multi = new[] { new JsonADF { Id = 1 }, new JsonADF { Id = 2 } } }); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonConvertibleMulti() { using (var vw = new VowpalWabbitExampleValidator(new VowpalWabbitSettings("--cb_adf") { TypeInspector = JsonTypeInspector.Default })) { vw.Validate(new[] { "shared | Bar:5", " | Foo:1 |Value test:1.2", " | Foo:2 |Value test:2.3", " | Foo:3 |Value titleabc\"def", }, new JsonRawAdfString { Bar = 5, _multi = new[] { new JsonRawString { Foo = 1, Value = JsonConvert.SerializeObject(new { test = 1.2 }) }, new JsonRawString { Foo = 2, Value = JsonConvert.SerializeObject(new { test = 2.3 }) }, new JsonRawString { Foo = 3, Value = JsonConvert.SerializeObject(new { title = "abc\"def", _ignoreMe = 1 }) }, } }); var adf = new JsonRawString { Foo = 1, Value = JsonConvert.SerializeObject(new { A = new { test = 1.2 }, B = new { bar = 2 } }), Values = new[] { JsonConvert.SerializeObject(new { D = new { d = 1.2 } }), JsonConvert.SerializeObject(new { E = new { e = true } }), }.ToList() }; var ctx = new JsonRawAdfString { Bar = 5, _multi = new[] { adf } }; vw.Validate(new[] { "shared | Bar:5", " | Foo:1 |A test:1.2 |B bar:2 |D d:1.2 |E e" }, ctx); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonConvertible() { using (var vw = new VowpalWabbitExampleValidator(new VowpalWabbitSettings("") { TypeInspector = JsonTypeInspector.Default })) { var adf = new JsonRawString { Foo = 1, Value = JsonConvert.SerializeObject(new { A = new { test = 1.2 }, B = new { bar = 2 } }), Values = new[] { JsonConvert.SerializeObject(new { D = new { d = 1.2 } }), JsonConvert.SerializeObject(new { E = new { e = true } }), JsonConvert.SerializeObject(new { F = new { title = "abc\"def" } }), }.ToList() }; vw.Validate(" | Foo:1 |A test:1.2 |B bar:2 |D d:1.2 |E e |F titleabc\"def", adf); } } [TestMethod] [TestCategory("Vowpal Wabbit/JSON")] public void TestJsonDictArray() { using (var vw = new VowpalWabbitExampleValidator(new VowpalWabbitSettings(string.Empty) { TypeInspector = JsonTypeInspector.Default })) { var ex = new JsonDictArray { Features = new Dictionary { { "A", new float[] { 1, 2, 3.1f} }, { "B", new float[] { 2, 3, 4.1f} } } }; vw.Validate(" |A :1 :2 :3.1 |B :2 :3 :4.1", ex); } } } public class JsonRawString { public int Foo { get; set; } [JsonConverter(typeof(JsonRawStringConverter))] public string Value { get; set; } [JsonConverter(typeof(JsonRawStringListConverter))] public List Values { get; set; } } public class JsonRawAdfString { public int Bar { get; set; } public JsonRawString[] _multi { get; set; } } public class JsonText { [JsonProperty("_text")] public string Text { get; set; } [JsonProperty("_auxInfo")] public string AuxInfo { get; set; } [JsonProperty("a")] public JsonText A { get; set; } } [JsonObject(MemberSerialization = MemberSerialization.OptOut)] public class JsonContextArray { [JsonIgnore] public ILabel Label { get; set; } public float[] Data { get; set; } } [JsonObject(MemberSerialization = MemberSerialization.OptIn)] public class JsonContextOptIn { public int IgnoreMe { get; set; } [JsonProperty] public bool Clicked { get; set; } public Namespace1 IgnoredNamespace { get; set; } [JsonProperty] public Namespace2 Ns2 { get; set; } } public class JsonContext { [JsonProperty(PropertyName = "_label")] public SimpleLabel Label { get; set; } [JsonProperty(PropertyName = "a", NullValueHandling = NullValueHandling.Ignore)] public Namespace1 Ns1 { get; set; } [JsonProperty(PropertyName = "b", NullValueHandling = NullValueHandling.Ignore)] public Namespace2 Ns2 { get; set; } [JsonProperty] public int Clicks { get; set; } public int MoreClicks { get; set; } [JsonIgnore] public object IgnoreMe { get; set; } [JsonProperty(PropertyName = "_aux", NullValueHandling = NullValueHandling.Ignore)] public object IgnoreMe2 { get; set; } } public class JsonContextByte { public byte Feature { get; set; } } public class Namespace1 { [JsonProperty(PropertyName = "Bar", NullValueHandling = NullValueHandling.Ignore)] public int Foo { get; set; } [JsonProperty] public string Age { get; set; } [JsonIgnore] public string DontConsider { get; set; } [JsonProperty] public string EscapeCharacterString { get; set; } [JsonProperty("_text")] public string EscapeCharactersText { get; set; } } public class Namespace2 { [JsonProperty("Marker")] public bool FeatureA { get; set; } } public class JsonShared { public string Age { get; set; } [JsonProperty("_multi")] public JsonADF[] Documents { get; set; } } public class JsonSharedList { public string Age { get; set; } public List _multi { get; set; } } public class JsonSharedEmpty { [JsonProperty("_ignoreMe")] public string Age { get; set; } public IEnumerable _multi { get; set; } } public class JsonADF { public int Id { get; set; } } public class JsonDictArray { public Dictionary Features { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestLabels.cs000066400000000000000000000056311332666127000221770ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; using VW.Labels; using VW.Serializer; using VW.Serializer.Attributes; namespace cs_unittest { [TestClass] public class TestLabelsClass { [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] [TestCategory("Vowpal Wabbit")] public void TestLabels() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("3.2 | Feature:25", new SimpleContext { Feature = 25, Label = new SimpleLabel { Label = 3.2f } }); vw.Validate("| Feature:25", new SimpleContext { Feature = 25, }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] [TestCategory("Vowpal Wabbit")] public void TestLabelsNoAnnotation() { using (var vw = new VowpalWabbitExampleValidator( new VowpalWabbitSettings { TypeInspector = TypeInspector.All })) { vw.Validate("3.2 | Feature:25", new SimpleContextNoAnnotation { Feature = 25, Label = new SimpleLabel { Label = 3.2f } }); vw.Validate("| Feature:25", new SimpleContextNoAnnotation { Feature = 25, }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] [TestCategory("Vowpal Wabbit")] public void TestStringLabels() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("3.2 | Feature:25", new SimpleStringContext { Feature = 25, Label = "3.2" }); vw.Validate("| Feature:25", new SimpleStringContext { Feature = 25, }); } } } public class SimpleContext { [Feature] public int Feature { get; set; } [Label] public ILabel Label { get; set; } } public class SimpleContextNoAnnotation { public int Feature { get; set; } public ILabel Label { get; set; } } public class SimpleStringContext { [Feature] public int Feature { get; set; } [Label] public string Label { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestLeak.cs000066400000000000000000000004521332666127000216450ustar00rootroot00000000000000using VW; namespace cs_unittest { public class TestLeakClass { public void Leak() { #if DEBUG VowpalWabbitLeakTest.Leak(); #endif } public void NoLeak() { #if DEBUG VowpalWabbitLeakTest.NoLeak(); #endif } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestManagedHash.cs000066400000000000000000000125551332666127000231400ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Diagnostics; using System.Text; using TrainSet0002Dat; using VW; namespace cs_unittest { [TestClass] public class TestManagedHash { static string[] data = { "wLceNLHVtpuZYtPwPaQ", "949007 ", "Kvq7Hj6RSImhZUhcZuL", "KDqmcZO2h6CIM1j ", "656656 ", "X8bRcLpb8yrIkA2A ", "k5USpack9N ", "Okv90y9lOamog3qXRIk", "860628 ", "903251 ", "289149 ", "727293 ", "660473 ", "ciajGNL930GlKi5b ", "744202 ", "458807 ", "466125 ", "101824 ", "0RVP8HWyKGFjkJG8RA ", "mm3XQ0ZKJQ4rPmtNN ", "ZL7Z6bzFVsL0VQGe5ss", "690592 ", "Apbr4WNUDHmL7OWxm ", "342052 ", "286245 ", "JbIIXVVbS3Y79uj4iI ", "S9E90IvFAVt ", "z2QWOpzi63 ", "gnCClcujq79 ", "hOZPaw9s4922I3S ", "mFWZjVtCOiymM2 ", "m6a93w7IRLNaadJbL ", "758870 ", "164290 ", "971935 ", "MVObGSH9iWxiyvp ", "135400 ", "T2b9WalhX9c ", "CQaS6KtGArRLtM5v ", "B0lNkkeP57ZLJjZAwfP", "695049 ", "BSbUX2YPm1daHvo6 ", "ReVgoh7mtQpghPDl ", "I0RnHRdk5IRFHJZaZST", "489901 ", "a9IZGkY6WLtX0X37D ", "061731 ", "402102 ", "IgRGpl2Z0OdgNzr6AH ", "tNlzNvlPQ0hXFlzjpe8", "m2JmhQ8L6DEnauuvSst", "141010 ", "534087 ", "599686 ", "000093 ", "707313 ", "563622 ", "HlcM6fNDjW ", "4qEn6lfmhd2b6Fo ", "ph5x9nJTFV ", "783062 ", "403127 ", "fGbvUKatET3SAf0rfA ", "IduDv41Z1z7Opirz ", "625285 ", "HbsPUqTZvWHI4ylB ", "554240 ", "849636 ", "1ElP3So1fCS ", "539836 ", "jELB4FrYkqwpmecr ", "Ko4EWBb3gFqN0PR7pvf", "VMX4dVyfAZ0V9VwK ", "K0BYm86Zg8PogMNSo ", "ajfcoff0sqt ", "373791 ", "220160 ", "dxZoyNeZZMiO ", "286375 ", "DEy4nNiHHd9nN ", "3gverMSb6ANY3wLj ", "ATGPA40OShUer ", "548754 ", "7NdgIl223apO ", "aaas hu as 撒 asfasd 阿萨", "oof Ồ hử hị ộ ở ỗ õ ẽ uyễn \r \t \n \\ ", "934625 ", "123" }; [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestHash() { InternalTestHash(""); InternalTestHash("--hash all"); InternalTestHash("--hash strings"); } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestHashSpace() { using (var vw = new VowpalWabbit("")) { Assert.AreEqual(0ul, vw.HashSpace(" ")); Assert.AreEqual(0ul, vw.HashSpace("0")); } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestHashUnicodeSpace() { using (var vw = new VowpalWabbit("")) { var value = "ArticleTitleThe_25_Biggest_Art_Moments_of_2012" + (char)160; // Encoding.UTF8.GetMaxByteCount var nativeHash = vw.HashSpaceNative(value); var managedHash = vw.HashSpace(value); Assert.AreEqual(nativeHash, managedHash); } } private void InternalTestHash(string args) { var stopWatchNative = new Stopwatch(); var stopWatchManaged = new Stopwatch(); using (var vw = new VowpalWabbit(args)) { for (int i = 0; i < 10000; i++) { foreach (var item in data) { stopWatchNative.Start(); var nativeHash = vw.HashSpaceNative(item); stopWatchNative.Stop(); stopWatchManaged.Start(); var managedHash = vw.HashSpace(item); stopWatchManaged.Stop(); Assert.AreEqual(nativeHash, managedHash, item); } } } Console.WriteLine("Args: " + args); Console.WriteLine("native: {0}", stopWatchNative.Elapsed); Console.WriteLine("managed: {0}", stopWatchManaged.Elapsed); } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestMarshalNumeric.cs000066400000000000000000001614101332666127000237050ustar00rootroot00000000000000 using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using VW.Serializer; using VW.Serializer.Attributes; namespace cs_unittest { [TestClass] public class TestMarshalNumeric { [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericByte() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| Value:25", new NumericExampleByte() { Value = 25 }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericByteArray() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :4 :2 :3", new NumericExampleByteArray() { Value = new System.Byte[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleByteArray()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericByteArrayAnchor() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :1 :4 :2 :3", new NumericExampleByteArrayAnchor() { Value = new System.Byte[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleByteArrayAnchor()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericSByte() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| Value:25", new NumericExampleSByte() { Value = 25 }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericSByteArray() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :4 :2 :3", new NumericExampleSByteArray() { Value = new System.SByte[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleSByteArray()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericSByteArrayAnchor() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :1 :4 :2 :3", new NumericExampleSByteArrayAnchor() { Value = new System.SByte[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleSByteArrayAnchor()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericInt16() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| Value:25", new NumericExampleInt16() { Value = 25 }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericInt16Array() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :4 :2 :3", new NumericExampleInt16Array() { Value = new System.Int16[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleInt16Array()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericInt16ArrayAnchor() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :1 :4 :2 :3", new NumericExampleInt16ArrayAnchor() { Value = new System.Int16[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleInt16ArrayAnchor()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericInt32() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| Value:25", new NumericExampleInt32() { Value = 25 }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericInt32Array() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :4 :2 :3", new NumericExampleInt32Array() { Value = new System.Int32[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleInt32Array()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericInt32ArrayAnchor() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :1 :4 :2 :3", new NumericExampleInt32ArrayAnchor() { Value = new System.Int32[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleInt32ArrayAnchor()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericUInt16() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| Value:25", new NumericExampleUInt16() { Value = 25 }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericUInt16Array() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :4 :2 :3", new NumericExampleUInt16Array() { Value = new System.UInt16[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleUInt16Array()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericUInt16ArrayAnchor() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :1 :4 :2 :3", new NumericExampleUInt16ArrayAnchor() { Value = new System.UInt16[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleUInt16ArrayAnchor()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericUInt32() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| Value:25", new NumericExampleUInt32() { Value = 25 }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericUInt32Array() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :4 :2 :3", new NumericExampleUInt32Array() { Value = new System.UInt32[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleUInt32Array()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericUInt32ArrayAnchor() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :1 :4 :2 :3", new NumericExampleUInt32ArrayAnchor() { Value = new System.UInt32[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleUInt32ArrayAnchor()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericSingle() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| Value:25", new NumericExampleSingle() { Value = 25 }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericSingleArray() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :4 :2 :3", new NumericExampleSingleArray() { Value = new System.Single[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleSingleArray()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericSingleArrayAnchor() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :1 :4 :2 :3", new NumericExampleSingleArrayAnchor() { Value = new System.Single[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleSingleArrayAnchor()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericInt64() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| Value:25", new NumericExampleInt64() { Value = 25 }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericInt64Array() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :4 :2 :3", new NumericExampleInt64Array() { Value = new System.Int64[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleInt64Array()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericInt64ArrayAnchor() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :1 :4 :2 :3", new NumericExampleInt64ArrayAnchor() { Value = new System.Int64[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleInt64ArrayAnchor()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericUInt64() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| Value:25", new NumericExampleUInt64() { Value = 25 }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericUInt64Array() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :4 :2 :3", new NumericExampleUInt64Array() { Value = new System.UInt64[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleUInt64Array()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericUInt64ArrayAnchor() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :1 :4 :2 :3", new NumericExampleUInt64ArrayAnchor() { Value = new System.UInt64[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleUInt64ArrayAnchor()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericDouble() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| Value:25", new NumericExampleDouble() { Value = 25 }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericDoubleArray() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :4 :2 :3", new NumericExampleDoubleArray() { Value = new System.Double[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleDoubleArray()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumericDoubleArrayAnchor() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| :1 :4 :2 :3", new NumericExampleDoubleArrayAnchor() { Value = new System.Double[] { 4, 2, 3 } }); vw.Validate("", new NumericExampleDoubleArrayAnchor()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryByte() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleByte() { Dict = new Dictionary() }; example.Dict.Add(15, .5f); example.Dict.Add(5, .3f); example.Dict.Add(20, 123.2f); vw.Validate("| 15:0.5 5:0.3 20:123.2", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryByteString() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleByteString() { Dict = new Dictionary() }; example.Dict.Add(15, "0.5"); example.Dict.Add(5, "0.3"); example.Dict.Add(20, "123.2"); vw.Validate("| 15:0.5 5:0.3 20:123.2", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryByteByte() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleByteByte() { Dict = new Dictionary() }; example.Dict.Add(15, (Byte)3); example.Dict.Add(5, (Byte)4); example.Dict.Add(20, (Byte)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryByteSByte() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleByteSByte() { Dict = new Dictionary() }; example.Dict.Add(15, (SByte)3); example.Dict.Add(5, (SByte)4); example.Dict.Add(20, (SByte)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryByteInt16() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleByteInt16() { Dict = new Dictionary() }; example.Dict.Add(15, (Int16)3); example.Dict.Add(5, (Int16)4); example.Dict.Add(20, (Int16)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryByteInt32() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleByteInt32() { Dict = new Dictionary() }; example.Dict.Add(15, (Int32)3); example.Dict.Add(5, (Int32)4); example.Dict.Add(20, (Int32)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryByteUInt16() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleByteUInt16() { Dict = new Dictionary() }; example.Dict.Add(15, (UInt16)3); example.Dict.Add(5, (UInt16)4); example.Dict.Add(20, (UInt16)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryByteUInt32() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleByteUInt32() { Dict = new Dictionary() }; example.Dict.Add(15, (UInt32)3); example.Dict.Add(5, (UInt32)4); example.Dict.Add(20, (UInt32)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryByteSingle() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleByteSingle() { Dict = new Dictionary() }; example.Dict.Add(15, (Single)3); example.Dict.Add(5, (Single)4); example.Dict.Add(20, (Single)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryByteInt64() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleByteInt64() { Dict = new Dictionary() }; example.Dict.Add(15, (Int64)3); example.Dict.Add(5, (Int64)4); example.Dict.Add(20, (Int64)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryByteUInt64() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleByteUInt64() { Dict = new Dictionary() }; example.Dict.Add(15, (UInt64)3); example.Dict.Add(5, (UInt64)4); example.Dict.Add(20, (UInt64)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryByteDouble() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleByteDouble() { Dict = new Dictionary() }; example.Dict.Add(15, (Double)3); example.Dict.Add(5, (Double)4); example.Dict.Add(20, (Double)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionarySByte() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleSByte() { Dict = new Dictionary() }; example.Dict.Add(15, .5f); example.Dict.Add(5, .3f); example.Dict.Add(20, 123.2f); vw.Validate("| 15:0.5 5:0.3 20:123.2", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionarySByteString() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleSByteString() { Dict = new Dictionary() }; example.Dict.Add(15, "0.5"); example.Dict.Add(5, "0.3"); example.Dict.Add(20, "123.2"); vw.Validate("| 15:0.5 5:0.3 20:123.2", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionarySByteByte() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleSByteByte() { Dict = new Dictionary() }; example.Dict.Add(15, (Byte)3); example.Dict.Add(5, (Byte)4); example.Dict.Add(20, (Byte)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionarySByteSByte() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleSByteSByte() { Dict = new Dictionary() }; example.Dict.Add(15, (SByte)3); example.Dict.Add(5, (SByte)4); example.Dict.Add(20, (SByte)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionarySByteInt16() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleSByteInt16() { Dict = new Dictionary() }; example.Dict.Add(15, (Int16)3); example.Dict.Add(5, (Int16)4); example.Dict.Add(20, (Int16)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionarySByteInt32() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleSByteInt32() { Dict = new Dictionary() }; example.Dict.Add(15, (Int32)3); example.Dict.Add(5, (Int32)4); example.Dict.Add(20, (Int32)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionarySByteUInt16() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleSByteUInt16() { Dict = new Dictionary() }; example.Dict.Add(15, (UInt16)3); example.Dict.Add(5, (UInt16)4); example.Dict.Add(20, (UInt16)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionarySByteUInt32() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleSByteUInt32() { Dict = new Dictionary() }; example.Dict.Add(15, (UInt32)3); example.Dict.Add(5, (UInt32)4); example.Dict.Add(20, (UInt32)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionarySByteSingle() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleSByteSingle() { Dict = new Dictionary() }; example.Dict.Add(15, (Single)3); example.Dict.Add(5, (Single)4); example.Dict.Add(20, (Single)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionarySByteInt64() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleSByteInt64() { Dict = new Dictionary() }; example.Dict.Add(15, (Int64)3); example.Dict.Add(5, (Int64)4); example.Dict.Add(20, (Int64)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionarySByteUInt64() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleSByteUInt64() { Dict = new Dictionary() }; example.Dict.Add(15, (UInt64)3); example.Dict.Add(5, (UInt64)4); example.Dict.Add(20, (UInt64)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionarySByteDouble() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleSByteDouble() { Dict = new Dictionary() }; example.Dict.Add(15, (Double)3); example.Dict.Add(5, (Double)4); example.Dict.Add(20, (Double)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt16() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt16() { Dict = new Dictionary() }; example.Dict.Add(15, .5f); example.Dict.Add(5, .3f); example.Dict.Add(20, 123.2f); vw.Validate("| 15:0.5 5:0.3 20:123.2", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt16String() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt16String() { Dict = new Dictionary() }; example.Dict.Add(15, "0.5"); example.Dict.Add(5, "0.3"); example.Dict.Add(20, "123.2"); vw.Validate("| 15:0.5 5:0.3 20:123.2", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt16Byte() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt16Byte() { Dict = new Dictionary() }; example.Dict.Add(15, (Byte)3); example.Dict.Add(5, (Byte)4); example.Dict.Add(20, (Byte)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt16SByte() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt16SByte() { Dict = new Dictionary() }; example.Dict.Add(15, (SByte)3); example.Dict.Add(5, (SByte)4); example.Dict.Add(20, (SByte)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt16Int16() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt16Int16() { Dict = new Dictionary() }; example.Dict.Add(15, (Int16)3); example.Dict.Add(5, (Int16)4); example.Dict.Add(20, (Int16)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt16Int32() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt16Int32() { Dict = new Dictionary() }; example.Dict.Add(15, (Int32)3); example.Dict.Add(5, (Int32)4); example.Dict.Add(20, (Int32)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt16UInt16() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt16UInt16() { Dict = new Dictionary() }; example.Dict.Add(15, (UInt16)3); example.Dict.Add(5, (UInt16)4); example.Dict.Add(20, (UInt16)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt16UInt32() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt16UInt32() { Dict = new Dictionary() }; example.Dict.Add(15, (UInt32)3); example.Dict.Add(5, (UInt32)4); example.Dict.Add(20, (UInt32)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt16Single() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt16Single() { Dict = new Dictionary() }; example.Dict.Add(15, (Single)3); example.Dict.Add(5, (Single)4); example.Dict.Add(20, (Single)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt16Int64() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt16Int64() { Dict = new Dictionary() }; example.Dict.Add(15, (Int64)3); example.Dict.Add(5, (Int64)4); example.Dict.Add(20, (Int64)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt16UInt64() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt16UInt64() { Dict = new Dictionary() }; example.Dict.Add(15, (UInt64)3); example.Dict.Add(5, (UInt64)4); example.Dict.Add(20, (UInt64)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt16Double() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt16Double() { Dict = new Dictionary() }; example.Dict.Add(15, (Double)3); example.Dict.Add(5, (Double)4); example.Dict.Add(20, (Double)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt32() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt32() { Dict = new Dictionary() }; example.Dict.Add(15, .5f); example.Dict.Add(5, .3f); example.Dict.Add(20, 123.2f); vw.Validate("| 15:0.5 5:0.3 20:123.2", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt32String() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt32String() { Dict = new Dictionary() }; example.Dict.Add(15, "0.5"); example.Dict.Add(5, "0.3"); example.Dict.Add(20, "123.2"); vw.Validate("| 15:0.5 5:0.3 20:123.2", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt32Byte() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt32Byte() { Dict = new Dictionary() }; example.Dict.Add(15, (Byte)3); example.Dict.Add(5, (Byte)4); example.Dict.Add(20, (Byte)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt32SByte() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt32SByte() { Dict = new Dictionary() }; example.Dict.Add(15, (SByte)3); example.Dict.Add(5, (SByte)4); example.Dict.Add(20, (SByte)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt32Int16() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt32Int16() { Dict = new Dictionary() }; example.Dict.Add(15, (Int16)3); example.Dict.Add(5, (Int16)4); example.Dict.Add(20, (Int16)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt32Int32() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt32Int32() { Dict = new Dictionary() }; example.Dict.Add(15, (Int32)3); example.Dict.Add(5, (Int32)4); example.Dict.Add(20, (Int32)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt32UInt16() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt32UInt16() { Dict = new Dictionary() }; example.Dict.Add(15, (UInt16)3); example.Dict.Add(5, (UInt16)4); example.Dict.Add(20, (UInt16)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt32UInt32() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt32UInt32() { Dict = new Dictionary() }; example.Dict.Add(15, (UInt32)3); example.Dict.Add(5, (UInt32)4); example.Dict.Add(20, (UInt32)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt32Single() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt32Single() { Dict = new Dictionary() }; example.Dict.Add(15, (Single)3); example.Dict.Add(5, (Single)4); example.Dict.Add(20, (Single)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt32Int64() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt32Int64() { Dict = new Dictionary() }; example.Dict.Add(15, (Int64)3); example.Dict.Add(5, (Int64)4); example.Dict.Add(20, (Int64)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt32UInt64() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt32UInt64() { Dict = new Dictionary() }; example.Dict.Add(15, (UInt64)3); example.Dict.Add(5, (UInt64)4); example.Dict.Add(20, (UInt64)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryInt32Double() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleInt32Double() { Dict = new Dictionary() }; example.Dict.Add(15, (Double)3); example.Dict.Add(5, (Double)4); example.Dict.Add(20, (Double)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryUInt16() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleUInt16() { Dict = new Dictionary() }; example.Dict.Add(15, .5f); example.Dict.Add(5, .3f); example.Dict.Add(20, 123.2f); vw.Validate("| 15:0.5 5:0.3 20:123.2", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryUInt16String() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleUInt16String() { Dict = new Dictionary() }; example.Dict.Add(15, "0.5"); example.Dict.Add(5, "0.3"); example.Dict.Add(20, "123.2"); vw.Validate("| 15:0.5 5:0.3 20:123.2", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryUInt16Byte() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleUInt16Byte() { Dict = new Dictionary() }; example.Dict.Add(15, (Byte)3); example.Dict.Add(5, (Byte)4); example.Dict.Add(20, (Byte)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryUInt16SByte() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleUInt16SByte() { Dict = new Dictionary() }; example.Dict.Add(15, (SByte)3); example.Dict.Add(5, (SByte)4); example.Dict.Add(20, (SByte)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryUInt16Int16() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleUInt16Int16() { Dict = new Dictionary() }; example.Dict.Add(15, (Int16)3); example.Dict.Add(5, (Int16)4); example.Dict.Add(20, (Int16)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryUInt16Int32() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleUInt16Int32() { Dict = new Dictionary() }; example.Dict.Add(15, (Int32)3); example.Dict.Add(5, (Int32)4); example.Dict.Add(20, (Int32)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryUInt16UInt16() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleUInt16UInt16() { Dict = new Dictionary() }; example.Dict.Add(15, (UInt16)3); example.Dict.Add(5, (UInt16)4); example.Dict.Add(20, (UInt16)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryUInt16UInt32() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleUInt16UInt32() { Dict = new Dictionary() }; example.Dict.Add(15, (UInt32)3); example.Dict.Add(5, (UInt32)4); example.Dict.Add(20, (UInt32)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryUInt16Single() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleUInt16Single() { Dict = new Dictionary() }; example.Dict.Add(15, (Single)3); example.Dict.Add(5, (Single)4); example.Dict.Add(20, (Single)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryUInt16Int64() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleUInt16Int64() { Dict = new Dictionary() }; example.Dict.Add(15, (Int64)3); example.Dict.Add(5, (Int64)4); example.Dict.Add(20, (Int64)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryUInt16UInt64() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleUInt16UInt64() { Dict = new Dictionary() }; example.Dict.Add(15, (UInt64)3); example.Dict.Add(5, (UInt64)4); example.Dict.Add(20, (UInt64)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionaryUInt16Double() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { var example = new DictionaryExampleUInt16Double() { Dict = new Dictionary() }; example.Dict.Add(15, (Double)3); example.Dict.Add(5, (Double)4); example.Dict.Add(20, (Double)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } } public class NumericExampleByte { [Feature] public System.Byte Value { get; set; } } public class NumericExampleByteArray { [Feature] public System.Byte[] Value { get; set; } } public class NumericExampleByteArrayAnchor { [Feature(AddAnchor = true)] public System.Byte[] Value { get; set; } } public class NumericExampleSByte { [Feature] public System.SByte Value { get; set; } } public class NumericExampleSByteArray { [Feature] public System.SByte[] Value { get; set; } } public class NumericExampleSByteArrayAnchor { [Feature(AddAnchor = true)] public System.SByte[] Value { get; set; } } public class NumericExampleInt16 { [Feature] public System.Int16 Value { get; set; } } public class NumericExampleInt16Array { [Feature] public System.Int16[] Value { get; set; } } public class NumericExampleInt16ArrayAnchor { [Feature(AddAnchor = true)] public System.Int16[] Value { get; set; } } public class NumericExampleInt32 { [Feature] public System.Int32 Value { get; set; } } public class NumericExampleInt32Array { [Feature] public System.Int32[] Value { get; set; } } public class NumericExampleInt32ArrayAnchor { [Feature(AddAnchor = true)] public System.Int32[] Value { get; set; } } public class NumericExampleUInt16 { [Feature] public System.UInt16 Value { get; set; } } public class NumericExampleUInt16Array { [Feature] public System.UInt16[] Value { get; set; } } public class NumericExampleUInt16ArrayAnchor { [Feature(AddAnchor = true)] public System.UInt16[] Value { get; set; } } public class NumericExampleUInt32 { [Feature] public System.UInt32 Value { get; set; } } public class NumericExampleUInt32Array { [Feature] public System.UInt32[] Value { get; set; } } public class NumericExampleUInt32ArrayAnchor { [Feature(AddAnchor = true)] public System.UInt32[] Value { get; set; } } public class NumericExampleSingle { [Feature] public System.Single Value { get; set; } } public class NumericExampleSingleArray { [Feature] public System.Single[] Value { get; set; } } public class NumericExampleSingleArrayAnchor { [Feature(AddAnchor = true)] public System.Single[] Value { get; set; } } public class NumericExampleInt64 { [Feature] public System.Int64 Value { get; set; } } public class NumericExampleInt64Array { [Feature] public System.Int64[] Value { get; set; } } public class NumericExampleInt64ArrayAnchor { [Feature(AddAnchor = true)] public System.Int64[] Value { get; set; } } public class NumericExampleUInt64 { [Feature] public System.UInt64 Value { get; set; } } public class NumericExampleUInt64Array { [Feature] public System.UInt64[] Value { get; set; } } public class NumericExampleUInt64ArrayAnchor { [Feature(AddAnchor = true)] public System.UInt64[] Value { get; set; } } public class NumericExampleDouble { [Feature] public System.Double Value { get; set; } } public class NumericExampleDoubleArray { [Feature] public System.Double[] Value { get; set; } } public class NumericExampleDoubleArrayAnchor { [Feature(AddAnchor = true)] public System.Double[] Value { get; set; } } public class DictionaryExampleByte { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleByteString { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleByteByte { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleByteSByte { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleByteInt16 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleByteInt32 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleByteUInt16 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleByteUInt32 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleByteSingle { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleByteInt64 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleByteUInt64 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleByteDouble { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleSByte { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleSByteString { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleSByteByte { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleSByteSByte { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleSByteInt16 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleSByteInt32 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleSByteUInt16 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleSByteUInt32 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleSByteSingle { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleSByteInt64 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleSByteUInt64 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleSByteDouble { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt16 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt16String { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt16Byte { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt16SByte { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt16Int16 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt16Int32 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt16UInt16 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt16UInt32 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt16Single { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt16Int64 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt16UInt64 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt16Double { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt32 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt32String { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt32Byte { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt32SByte { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt32Int16 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt32Int32 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt32UInt16 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt32UInt32 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt32Single { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt32Int64 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt32UInt64 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleInt32Double { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleUInt16 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleUInt16String { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleUInt16Byte { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleUInt16SByte { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleUInt16Int16 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleUInt16Int32 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleUInt16UInt16 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleUInt16UInt32 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleUInt16Single { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleUInt16Int64 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleUInt16UInt64 { [Feature] public Dictionary Dict { get; set; } } public class DictionaryExampleUInt16Double { [Feature] public Dictionary Dict { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestMarshalNumeric.tt000066400000000000000000000120221332666127000237210ustar00rootroot00000000000000<#@ output extension=".cs" #> using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using VW.Serializer; using VW.Serializer.Attributes; namespace cs_unittest { [TestClass] public class TestMarshalNumeric { <# foreach(var t in new[] { typeof(byte), typeof(sbyte), typeof(Int16), typeof(Int32), typeof(UInt16), typeof(UInt32), typeof(float), typeof(Int64), typeof(UInt64), typeof(double) }) { #> [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumeric<#=t.Name#>() { using(var vw = new VowpalWabbitExampleValidator>(string.Empty)) { vw.Validate("| Value:25", new NumericExample<#=t.Name#>() { Value = 25 }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumeric<#=t.Name#>Array() { using(var vw = new VowpalWabbitExampleValidatorArray>(string.Empty)) { vw.Validate("| :4 :2 :3", new NumericExample<#=t.Name#>Array() { Value = new <#=t#>[] { 4, 2, 3 } }); vw.Validate("", new NumericExample<#=t.Name#>Array()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNumeric<#=t.Name#>ArrayAnchor() { using(var vw = new VowpalWabbitExampleValidatorArrayAnchor>(string.Empty)) { vw.Validate("| :1 :4 :2 :3", new NumericExample<#=t.Name#>ArrayAnchor() { Value = new <#=t#>[] { 4, 2, 3 } }); vw.Validate("", new NumericExample<#=t.Name#>ArrayAnchor()); } } <# } #> <# foreach(var t in new[] { typeof(byte), typeof(sbyte), typeof(Int16), typeof(Int32), typeof(UInt16) }) { #> [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionary<#=t.Name#>() { using(var vw = new VowpalWabbitExampleValidator>(string.Empty)) { var example = new DictionaryExample<#=t.Name#>() { Dict = new Dictionary<<#=t#>, float>() }; example.Dict.Add(15, .5f); example.Dict.Add(5, .3f); example.Dict.Add(20, 123.2f); vw.Validate("| 15:0.5 5:0.3 20:123.2", example); example.Dict = null; vw.Validate("", example); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionary<#=t.Name#>String() { using(var vw = new VowpalWabbitExampleValidatorString>(string.Empty)) { var example = new DictionaryExample<#=t.Name#>String() { Dict = new Dictionary<<#=t#>, string>() }; example.Dict.Add(15, "0.5"); example.Dict.Add(5, "0.3"); example.Dict.Add(20, "123.2"); vw.Validate("| 15:0.5 5:0.3 20:123.2", example); example.Dict = null; vw.Validate("", example); } } <# foreach(var s in new[] { typeof(byte), typeof(sbyte), typeof(Int16), typeof(Int32), typeof(UInt16), typeof(UInt32), typeof(float), typeof(Int64), typeof(UInt64), typeof(double) }) { #> [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionary<#=t.Name#><#=s.Name#>() { using(var vw = new VowpalWabbitExampleValidator<#=s.Name#>>(string.Empty)) { var example = new DictionaryExample<#=t.Name#><#=s.Name#>() { Dict = new Dictionary<<#=t#>, <#=s.Name#>>() }; example.Dict.Add(15, (<#=s.Name#>)3); example.Dict.Add(5, (<#=s.Name#>)4); example.Dict.Add(20, (<#=s.Name#>)5); vw.Validate("| 15:3 5:4 20:5", example); example.Dict = null; vw.Validate("", example); } } <# } #> <# } #> } <# foreach(var t in new[] { typeof(byte), typeof(sbyte), typeof(Int16), typeof(Int32), typeof(UInt16), typeof(UInt32), typeof(float), typeof(Int64), typeof(UInt64), typeof(double) }) { #> public class NumericExample<#=t.Name#> { [Feature] public <#=t#> Value { get; set; } } public class NumericExample<#=t.Name#>Array { [Feature] public <#=t#>[] Value { get; set; } } public class NumericExample<#=t.Name#>ArrayAnchor { [Feature(AddAnchor = true)] public <#=t#>[] Value { get; set; } } <# } #> <# foreach(var t in new[] { typeof(byte), typeof(sbyte), typeof(Int16), typeof(Int32), typeof(UInt16) }) { #> public class DictionaryExample<#=t.Name#> { [Feature] public Dictionary<<#=t#>, float> Dict { get; set; } } public class DictionaryExample<#=t.Name#>String { [Feature] public Dictionary<<#=t#>, String> Dict { get; set; } } <# foreach(var s in new[] { typeof(byte), typeof(sbyte), typeof(Int16), typeof(Int32), typeof(UInt16), typeof(UInt32), typeof(float), typeof(Int64), typeof(UInt64), typeof(double) }) { #> public class DictionaryExample<#=t.Name#><#=s.Name#> { [Feature] public Dictionary<<#=t#>, <#=s#>> Dict { get; set; } } <# } #> <# } #> } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestMarshalling.cs000066400000000000000000000301711332666127000232330ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; using VW.Serializer; using VW.Serializer.Attributes; namespace cs_unittest { [TestClass] public class TestMarshalling { [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestEnumerize() { using(var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| AgeEnumerize25", new ExampleEnum() { AgeEnumerize = 25 }); vw.Validate("| AgeEnumerize0 AgeNumeric:25", new ExampleEnum() { AgeNumeric = 25 }); vw.Validate("| AgeEnumerize0 AgeNumeric:23 AgeEnumChild", new ExampleEnum() { AgeNumeric = 23, AgeEnum = Age.Child }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestString() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("|abc London", new ExampleString() { Location = "London" }); vw.Validate("", new ExampleString() { }); vw.Validate("", new ExampleString() { Location = "" }); } using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| VideoTitleRich_Homie_Quan_-_\"Blah_Blah_Blah\"___Behind_The_Scenes", new ExampleString4 { Value = "VideoTitleRich Homie Quan - \"Blah Blah Blah\" | Behind The Scenes" }); vw.Validate("| VideoTitleIt's_Official__Your_vibrator_Can_be_Hacked", new ExampleString4 { Value = "VideoTitleIt's Official: Your vibrator Can be Hacked" }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestStringFeatureGroup() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("|a London", new ExampleString2() { Location = "London" }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestStringNamespace() { try { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("|bc London", new ExampleString3() { Location = "London" }); } Assert.Fail("Expected ArgumentException"); } catch (ArgumentException) { } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestStringEscape() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| New_York_State", new ExampleStringEscape() { Value = "New York State" }); vw.Validate("| new_York_state", new ExampleStringEscape() { Value = "new York state" }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestStringSplit() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| New York State", new ExampleStringSplit() { Value = "New York State" }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestStringIncludeName() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| AgeTeenager", new ExampleStringInclude() { Age = "Teenager" }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestDictionary() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { var ex = new ExampleDictionary() { Dict = new Dictionary() }; ex.Dict.Add("Age", 25); ex.Dict.Add("Location", 1.2); vw.Validate("| Age:25 Location:1.2", ex); ex.Dict = null; vw.Validate("", ex); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestCustomType() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| value:2", new ExampleCustomType { Custom = new CustomType { value = 2 } }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestEnumerableString() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| A New_York B", new ExampleEnumerable { Value = new[] { "A", "New_York", "B" } }); vw.Validate("", new ExampleEnumerable()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestEnumerableKV() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| A:2 B:3", new ExampleEnumerableKV { Value = new [] { new KeyValuePair("A", 2), new KeyValuePair("B", 3) } }); vw.Validate("", new ExampleEnumerableKV()); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestComplexType() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("|ootheruser AgeAdult GenderMale PAge25 Views:4321 Boston 6:2.4", new UserContext { User = new UserFeatures { Age = Age.Adult, Gender = Gender.Male, Location = "Boston", PAge = 25, Views = 4321, Dict = new Dictionary { { 6, 2.4f } } } }); vw.Validate("|uuserlda :1 :2 :3", new UserContext { UserLDAVector = new FeatureVector { Vectors = new[] { 2f, 3f } } }); } using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| abc |ddoclda :1 :4 :5", new Document { Id = "abc", LDAVector = new FeatureVector { Vectors = new[] { 4f, 5f } } }); vw.Validate("| abc |ddoclda :1", new Document { Id = "abc", LDAVector = new FeatureVector { Vectors = new float[0] } }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestEnumerizePosition() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| Position0", new ExampleEnumerize { Position = 0 }); vw.Validate("| Position2", new ExampleEnumerize { Position = 2 }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestBool() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| OnOff", new ExampleBoolean { OnOff = true }); vw.Validate("| ", new ExampleBoolean { OnOff = false }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestFeatureDiscoveryAll() { using (var vw = new VowpalWabbitExampleValidator(new VowpalWabbitSettings { TypeInspector = TypeInspector.All })) { vw.Validate("| Feature1 Feature2:5", new POCO { Feature1 = true, Feature2 = 5 }); } } } public class POCODict { public Dictionary Features { get; set; } } public class POCO { public bool Feature1 { get; set; } [Feature] public int Feature2 { get; set; } } public class ExampleBoolean { [Feature] public bool OnOff { get; set; } } public class ExampleEnumerize { [Feature(Enumerize = true)] public int Position { get; set; } } public class ExampleStringEscape { [Feature(StringProcessing = StringProcessing.Escape)] public String Value { get; set; } } public class ExampleStringInclude { [Feature(StringProcessing = StringProcessing.EscapeAndIncludeName)] public String Age { get; set; } } public class ExampleStringSplit { [Feature(StringProcessing = StringProcessing.Split)] public String Value { get; set; } } public class UserContext { [Feature(Namespace = "otheruser", FeatureGroup = 'o')] public UserFeatures User { get; set; } [Feature(Namespace = "userlda", FeatureGroup = 'u', AddAnchor = true)] public FeatureVector UserLDAVector { get; set; } public IReadOnlyList ActionDependentFeatures { get; set; } } public class Document { [Feature] public string Id { get; set; } [Feature(Namespace = "doclda", FeatureGroup = 'd', AddAnchor = true)] public FeatureVector LDAVector { get; set; } } public class FeatureVector { [Feature(AddAnchor = true)] public float[] Vectors { get; set; } } public class UserFeatures { [Feature] public Age? Age { get; set; } [Feature(Enumerize = true)] public int? PAge { get; set; } [Feature] public Gender? Gender { get; set; } [Feature] public string Location { get; set; } [Feature] public long Views { get; set; } [Feature] public Dictionary Dict { get; set; } } public enum Gender { Female, Male } public class CustomType { [Feature] public int value { get; set; } } public class ExampleCustomType { [Feature] public CustomType Custom { get; set; } } public class ExampleDictionary { [Feature] public IDictionary Dict { get; set; } } public class ExampleEnumerable { [Feature] public IEnumerable Value { get; set; } } public class ExampleEnumerableKV { [Feature] public IEnumerable> Value { get; set; } } public class ExampleEnum { [Feature(Enumerize = true)] public int AgeEnumerize { get; set; } [Feature] public int? AgeNumeric { get; set; } [Feature] public Age? AgeEnum { get; set; } } public class ExampleString { [Feature(FeatureGroup = 'a', Namespace = "bc")] public string Location { get; set; } } public class ExampleString2 { [Feature(FeatureGroup = 'a')] public string Location { get; set; } } public class ExampleString3 { [Feature(Namespace = "bc")] public string Location { get; set; } } public class ExampleString4 { [Feature(StringProcessing = StringProcessing.Escape)] public string Value { get; set; } } public enum Age { Child, Adult } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestMarshallingOverflow.cs000066400000000000000000000063121332666127000247570ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; namespace cs_unittest { [TestClass] public class TestMarshallingOverflow { [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] [TestCategory("Vowpal Wabbit")] public void TestNumericInt64Overflow() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| Value:9.22337203685477580700E+018", new NumericExampleInt64() { Value = Int64.MaxValue }); vw.Validate("| Value:-9.22337203685477580700E+018", new NumericExampleInt64() { Value = Int64.MinValue}); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] [TestCategory("Vowpal Wabbit")] public void TestNumericUInt64Overflow() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| Value:1.844674E+19", new NumericExampleUInt64() { Value = UInt64.MaxValue}); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] [TestCategory("Vowpal Wabbit")] public void TestNumericDoubleOverflow() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| Value:1.79769313486231570000E+308", new NumericExampleDouble() { Value = double.MaxValue }); vw.Validate("| Value:-1.79769313486231570000E+308", new NumericExampleDouble() { Value = double.MinValue }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] [TestCategory("Vowpal Wabbit")] public void TestNumericInt64OverflowArray() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| 0:9.22337203685477580700E+018", new NumericExampleInt64Array() { Value = new [] { Int64.MaxValue } }); vw.Validate("| 0:-9.22337203685477580700E+018", new NumericExampleInt64Array() { Value = new[] { Int64.MinValue } }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] [TestCategory("Vowpal Wabbit")] public void TestNumericUInt64OverflowArray() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| 0:1.844674E+19", new NumericExampleUInt64Array() { Value = new[] { UInt64.MaxValue } }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] [TestCategory("Vowpal Wabbit")] public void TestNumericDoubleOverflowArray() { using (var vw = new VowpalWabbitExampleValidator(string.Empty)) { vw.Validate("| 0:1.79769313486231570000E+308", new NumericExampleDoubleArray() { Value = new [] { double.MaxValue } }); vw.Validate("| 0:-1.79769313486231570000E+308", new NumericExampleDoubleArray() { Value = new [] { double.MinValue } }); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestModelLoading.cs000066400000000000000000000140021332666127000233230ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; namespace cs_unittest { [TestClass] public class TestModelLoading : TestBase { [TestMethod] [TestCategory("Vowpal Wabbit/Model Loading")] public void TestLoadModelCorrupt() { InternalTestModel(@"model-sets/7.10.2_corrupted.model", false); } [TestMethod] [TestCategory("Vowpal Wabbit/Model Loading")] public void TestLoadModel() { InternalTestModel(@"model-sets/8.0.0_ok.model", true); InternalTestModel(@"model-sets/8.0.1.test_named_ok.model", true); InternalTestModel(@"model-sets/8.0.1_rcv1_ok.model", true); InternalTestModel(@"model-sets/8.0.1_hash_ok.model", true); } [TestMethod] [TestCategory("Vowpal Wabbit/Model Loading")] public void TestLoadModelRandomCorrupt() { InternalTestModelRandomCorrupt("model-sets/8.0.1.test_named_ok.model"); //InternalTestModelRandomCorrupt("model-sets/8.0.1_rcv1_ok.model"); //InternalTestModelRandomCorrupt("model-sets/8.0.1_hash_ok.model"); } [TestMethod] [TestCategory("Vowpal Wabbit/Model Loading")] public void TestLoadModelInMemory() { using (var vw = new VowpalWabbit(@"-i model-sets\8.0.1_rcv1_ok.model")) { var memStream = new MemoryStream(); vw.SaveModel(memStream); vw.SaveModel("native.model"); using (var file = File.Create("managed.file.model")) { vw.SaveModel(file); } var nativeModel = File.ReadAllBytes("native.model"); var managedFileModel = File.ReadAllBytes("managed.file.model"); var managedModel = memStream.ToArray(); Assert.IsTrue(nativeModel.SequenceEqual(managedModel)); Assert.IsTrue(nativeModel.SequenceEqual(managedFileModel)); } } [TestMethod] [TestCategory("Vowpal Wabbit/Model Loading")] public void TestID() { using (var vw = new VowpalWabbit("--id abc")) { Assert.AreEqual("abc", vw.ID); vw.SaveModel("model"); vw.ID = "def"; vw.SaveModel("model.TestID"); } using (var vw = new VowpalWabbit("-i model")) { Assert.AreEqual("abc", vw.ID); } using (var vw = new VowpalWabbit("-i model.TestID")) { Assert.AreEqual("def", vw.ID); } using (var vwm = new VowpalWabbitModel("-i model.TestID")) { Assert.AreEqual("def", vwm.ID); using (var vw = new VowpalWabbit(new VowpalWabbitSettings { Model = vwm })) { Assert.AreEqual("def", vw.ID); Assert.AreEqual(vwm.ID, vw.ID); } } } [TestMethod] [TestCategory("Vowpal Wabbit/Model Loading")] public void TestEmptyID() { using (var vw = new VowpalWabbit("-l 1")) { Assert.AreEqual(string.Empty, vw.ID); vw.SaveModel("model"); } using (var vw = new VowpalWabbit("-f model")) { Assert.AreEqual(string.Empty, vw.ID); } } [TestMethod] [TestCategory("Vowpal Wabbit/Model Loading")] public void TestReload() { using (var vw = new VowpalWabbit("")) { vw.SaveModel("model"); vw.Reload(); } using (var vw = new VowpalWabbit("")) { vw.ID = "def"; vw.SaveModel("model.TestReload"); vw.Reload(); Assert.AreEqual("def", vw.ID); } } private void InternalTestModel(string modelFile, bool shouldPass) { bool passed = false; try { using (var vw = new VowpalWabbitModel(string.Format("--quiet -t -i {0}", modelFile))) { // should only reach this point if model is valid passed = true; } } catch (VowpalWabbitException ex) { Assert.IsTrue(ex.Message.Contains("corrupted")); } if (shouldPass) { Assert.IsTrue(passed); } } private void InternalTestModelRandomCorrupt(string modelFile) { const int numBytesToCorrupt = 10; var rand = new Random(0); byte[] modelBytes = File.ReadAllBytes(modelFile); for (int i = 0; i < 100; i++) { var corruptBytes = new byte[modelBytes.Length]; Array.Copy(modelBytes, corruptBytes, corruptBytes.Length); for (int j = 0; j < numBytesToCorrupt; j++) { corruptBytes[rand.Next(corruptBytes.Length)] = (byte)rand.Next(byte.MaxValue); } try { using (var modelStream = new MemoryStream(corruptBytes)) using (var vw = new VowpalWabbitModel(new VowpalWabbitSettings("--quiet -t") { ModelStream = modelStream })) { // chances of reaching this point after reading a corrupt model are low Assert.IsTrue(false); } } catch (Exception) // an exception should be caught unless AV is encountered in which case the test will fail { Assert.IsTrue(true); } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestMultiClassPredictionFactory.cs000066400000000000000000000023711332666127000264240ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; namespace cs_unittest { [TestClass] public class TestMultiClassPredictionFactory { [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestMultiClassProbabilitiesFactory() { using (var vw = SetupVW()) { var res = vw.Predict(" | a", VowpalWabbitPredictionType.MultiClassProbabilities); Assert.AreEqual(res.Count, 3); Assert.AreEqual(res.Values.Sum(), 1.0f); Assert.IsTrue(res[1] > res[3]); Assert.IsTrue(res[2] > res[3]); var res2 = vw.Predict(" | e", VowpalWabbitPredictionType.MultiClassProbabilities); Assert.IsTrue(res2[3] > res2[1]); Assert.IsTrue(res2[3] > res2[2]); } } private VowpalWabbit SetupVW() { var vw = new VowpalWabbit(" --probabilities --loss_function=logistic --oaa 3"); vw.Learn("1 | a b"); vw.Learn("2 | a c"); vw.Learn("3 | c b e"); return vw; } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestMultiworldTesting.cs000066400000000000000000000030571332666127000244750ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; using VW.Labels; namespace cs_unittest { [TestClass] public class TestMultiworldTestingClass { [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestMultiworldTesting() { using (var mwt = new VowpalWabbitMultiworldTesting()) { AssertAreEqual( mwt.Evaluate(1, 2, new ContextualBanditLabel { Action = 1, Probability = .5f }), 0f, 0f, 0f); AssertAreEqual( mwt.Evaluate(1, 2, new ContextualBanditLabel { Action = 2, Probability = .5f, Cost = 1 }), 0f, 0f, 1f); AssertAreEqual( mwt.Evaluate(1, 2, new ContextualBanditLabel { Action = 1, Probability = .5f }), 0, 0, 0.6666667f); } } private static void AssertAreEqual(VowpalWabbitMultiworldTesting.PoliciesPerformance actual, params float[] expected) { Assert.AreEqual(expected.Length, actual.NumConstantPolicies + 1); Assert.AreEqual(expected[0], actual.LearnedPolicy, 0.0001, "Learned policy differs"); int i = 1; foreach (var value in actual.ConstantPolicies) { Assert.AreEqual(expected[i], value, 0.0001, "Constant policy " + i + " differs"); i++; } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestNull.cs000066400000000000000000000144471332666127000217140ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; using VW.Labels; using VW.Serializer.Attributes; namespace cs_unittest { [TestClass] public class TestNull { [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNull1() { using (var vw = new VowpalWabbit("--cb_adf --rank_all --interact ab")) { var ctx = new Context() { ID = 25, Vector = null, ActionDependentFeatures = new[] { new ADF { ADFID = "23" } }.ToList() }; vw.Learn(ctx, ctx.ActionDependentFeatures, 0, new ContextualBanditLabel() { Action = 1, Cost = 1, Probability = 0.2f }); var result = vw.Predict(ctx, ctx.ActionDependentFeatures); Assert.AreEqual(1, result.Length); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNull2() { using (var vw = new VowpalWabbit("--cb_adf --rank_all --interact ab")) { var ctx = new Context() { ID = 25, Vector = null, ActionDependentFeatures = new[] { new ADF { ADFID = "23", } }.ToList() }; vw.Learn(ctx, ctx.ActionDependentFeatures, 0, new ContextualBanditLabel() { Action = 1, Cost= 1, Probability = 0.2f }); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNull3() { using (var vw = new VowpalWabbit("--cb_adf --rank_all --interact ac")) { var ctx = new Context() { ID = 25, Vector = new float[] { 3 }, VectorC = new float[] { 2, 2, 3 }, ActionDependentFeatures = new[] { new ADF { ADFID = "23", } }.ToList() }; var label = new ContextualBanditLabel() { Action = 1, Cost= 1, Probability = 0.2f }; vw.Learn(ctx, ctx.ActionDependentFeatures, 0, label); ctx.Vector = null; vw.Learn(ctx, ctx.ActionDependentFeatures, 0, label); ctx.Vector = new float[] { 2 }; ctx.VectorC = null; vw.Learn(ctx, ctx.ActionDependentFeatures, 0, label); ctx.Vector = null; vw.Learn(ctx, ctx.ActionDependentFeatures, 0, label); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNull4() { using (var vw = new VowpalWabbit("--cb_adf --rank_all --interact ab")) { var ctx = new Context() { ID = 25, Vector = null, ActionDependentFeatures = new[] { new ADF { ADFID = null } }.ToList() }; var label = new ContextualBanditLabel() { Action = 1, Cost= 1, Probability = 0.2f }; vw.Learn(ctx, ctx.ActionDependentFeatures, 0, label); var result = vw.Predict(ctx, ctx.ActionDependentFeatures); Assert.AreEqual(1, result.Length); ctx.ID = null; vw.Learn(ctx, ctx.ActionDependentFeatures, 0, label); result = vw.Predict(ctx, ctx.ActionDependentFeatures); Assert.AreEqual(1, result.Length); } } [TestMethod] [TestCategory("Vowpal Wabbit/Marshal")] public void TestNull5() { using (var vw = new VowpalWabbit("--cb_adf --rank_all --interact ab")) { var ctx = new Context() { ID = 25, ActionDependentFeatures = new[] { new ADF { ADFID = "123" }, new ADF(), new ADF(), new ADF { ADFID = "4"} }.ToList() }; var label = new ContextualBanditLabel() { Action = 1, Cost = 1, Probability = 0.2f }; vw.Learn(ctx, ctx.ActionDependentFeatures, 0, label); var result = vw.Predict(ctx, ctx.ActionDependentFeatures); Assert.AreEqual(4, result.Length); ctx.ActionDependentFeatures[0].ADFID = null; ctx.ActionDependentFeatures[3].ADFID = null; result = vw.Predict(ctx, ctx.ActionDependentFeatures); Assert.AreEqual(4, result.Length); } } } public class ADF { [Feature] public string ADFID { get; set; } [Feature(FeatureGroup = 'b', AddAnchor = true)] public float[] Vector { get; set; } public ILabel Label { get; set; } } public class Context { [Feature] public int? ID { get; set; } [Feature(FeatureGroup = 'a', AddAnchor = true)] public float[] Vector { get; set; } [Feature(FeatureGroup = 'c')] public float[] VectorC { get; set; } public IReadOnlyList ActionDependentFeatures { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestPooling.cs000066400000000000000000000162541332666127000224070ustar00rootroot00000000000000using System; using System.Collections.Generic; using System.Threading; using Microsoft.VisualStudio.TestTools.UnitTesting; using VW; using VW.Labels; using VW.Serializer.Attributes; using System.IO; namespace cs_unittest { [TestClass] public class TestPooling { [TestMethod] [ExpectedException(typeof(InvalidOperationException))] [TestCategory("Vowpal Wabbit")] public void ObjectPoolTestEmptyFactory() { new ObjectPool().GetOrCreate(); } [TestMethod] [ExpectedException(typeof(ObjectDisposedException))] [TestCategory("Vowpal Wabbit")] public void ObjectPoolTestDisposed1() { var objectPool = new ObjectPool(ObjectFactory.Create(new Disposable(), d => d.Create())); objectPool.Dispose(); objectPool.GetOrCreate(); } [TestMethod] [ExpectedException(typeof(ObjectDisposedException))] [TestCategory("Vowpal Wabbit")] public void ObjectPoolTestDisposed2() { var objectPool = new ObjectPool(ObjectFactory.Create(new Disposable(), d => d.Create())); objectPool.Dispose(); objectPool.UpdateFactory(ObjectFactory.Create(new Disposable(), d => d.Create())); } [TestMethod] [TestCategory("Vowpal Wabbit")] public void ObjectPoolTestDangling() { var factory = new Disposable(); var objectPool = new ObjectPool(ObjectFactory.Create(factory, d => d.Create())); var p1 = objectPool.GetOrCreate(); objectPool.Dispose(); Assert.IsTrue(factory.Disposed); Assert.AreEqual(1, factory.Children.Count); Assert.IsFalse(factory.Children[0].Disposed); // don't throw exception if we return pool too late p1.Dispose(); } [TestMethod] [TestCategory("Vowpal Wabbit")] public void ObjectPoolTestFactory() { var factory1 = new Disposable(); var factory2 = new Disposable(); var objectPool = new ObjectPool(); objectPool.UpdateFactory(ObjectFactory.Create(factory1, d => d.Create())); var p3 = objectPool.GetOrCreate(); p3.Dispose(); objectPool.UpdateFactory(ObjectFactory.Create(factory2, d => d.Create())); Assert.IsTrue(factory1.Disposed); var p1 = objectPool.GetOrCreate(); var p2 = objectPool.GetOrCreate(); p1.Dispose(); p2.Dispose(); p1 = objectPool.GetOrCreate(); p1.Dispose(); objectPool.Dispose(); factory1.AssertChildrenDisposed(); Assert.AreEqual(1, factory1.Children.Count); factory2.AssertChildrenDisposed(); Assert.AreEqual(2, factory2.Children.Count); Assert.IsTrue(factory2.Disposed); } [TestMethod] [TestCategory("Vowpal Wabbit")] public void ThreadPoolNull() { using (var pool = new VowpalWabbitThreadedPrediction()) { using (var vw = pool.GetOrCreate()) { Assert.IsNull(vw.Value); } pool.UpdateModel(new VowpalWabbitModel(string.Empty)); using (var vw = pool.GetOrCreate()) { Assert.IsNotNull(vw.Value); } } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void ObjectPoolTestConcurrency() { var factories = new List { new Disposable() }; var objectPool = new ObjectPool(ObjectFactory.Create(factories[0], d => d.Create())); var t1 = new Thread(() => { for (int i = 0; i < 500; i++) { var p = objectPool.GetOrCreate(); Thread.Sleep(5); p.Dispose(); } }); var t2 = new Thread(() => { for (int i = 0; i < 500; i++) { var p = objectPool.GetOrCreate(); Thread.Sleep(7); p.Dispose(); } }); var t3 = new Thread(() => { for (int i = 0; i < 20; i++) { var f = new Disposable(); objectPool.UpdateFactory(ObjectFactory.Create(f, d => d.Create())); Thread.Sleep(20); } }); t1.Start(); t2.Start(); t3.Start(); t1.Join(); t2.Join(); t3.Join(); objectPool.Dispose(); foreach (var f in factories) { Assert.IsTrue(f.Disposed); f.AssertChildrenDisposed(); } } public class SimpleData { [Feature] public float Value { get; set; } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestSaveLoadSkip() { using (var vw = new VowpalWabbit("--binary -f saveload.model")) { for (int i = 0; i < 100; i++) { vw.Learn(new SimpleData { Value = 1 }, new SimpleLabel { Label = 1 }); vw.Learn(new SimpleData { Value = -1 }, new SimpleLabel { Label = -1 }); } Assert.AreEqual(1, vw.Predict(new SimpleData { Value = 1 }, VowpalWabbitPredictionType.Scalar)); Assert.AreEqual(-1, vw.Predict(new SimpleData { Value = -1 }, VowpalWabbitPredictionType.Scalar)); } using (var model = new VowpalWabbitModel(new VowpalWabbitSettings { Arguments = "--binary", ModelStream = File.Open("saveload.model", FileMode.Open) })) using (var pool = new VowpalWabbitThreadedPrediction(new VowpalWabbitSettings { Model = model })) { using (var vw = pool.GetOrCreate()) { Assert.AreEqual(-1, vw.Value.Predict(new SimpleData { Value = -1 }, VowpalWabbitPredictionType.Scalar)); } } } public class Disposable : IDisposable { public Disposable() { this.Children = new List(); } public void AssertChildrenDisposed() { foreach (var item in this.Children) { Assert.IsTrue(item.Disposed); } } public List Children { get; set; } public bool Disposed { get; set; } public void Dispose() { this.Disposed = true; } public Disposable Create() { var d = new Disposable(); this.Children.Add(d); return d; } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestRunner.cs000066400000000000000000000024511332666127000222430ustar00rootroot00000000000000using cs_testcommon; using System; using System.Reflection; namespace cs_unittest { /// /// Helper to perform leak testing /// public class TestRunner : MarshalByRefObject, ITestRunner { public string Run(string type, string method) { var testType = Type.GetType(type); var testObject = Activator.CreateInstance(testType); try { var m = testType.GetMethod(method); if (m == null) { return string.Format("TestRunner: {0}.{1} not found", type, method); } m.Invoke(testObject, null); } catch (Exception ex) { var tex = ex as TargetInvocationException; if (tex != null) { ex = tex.InnerException; } return string.Format("{0}\n{1}\n#-#-#-#-#-#-#{2}", ex.GetType(), ex.Message, ex.StackTrace); } finally { var disposable = testObject as IDisposable; if (disposable != null) { disposable.Dispose(); } } return null; } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestSerializer.cs000066400000000000000000000104401332666127000231000ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.Linq; using System.Linq.Expressions; using System.Reflection; using System.Text; using System.Threading.Tasks; using VW; using VW.Reflection; using VW.Serializer; using VW.Serializer.Attributes; using VW.Serializer.Intermediate; namespace cs_unittest { public class CustomClass { public int X { get; set; } public bool HasVisited = false; } public class MyContext { [Feature] public CustomClass Feature { get; set; } } public class CustomFeaturizer { public void MarshalFeature(VowpalWabbitMarshalContext context, Namespace ns, Feature feature, CustomClass value) { Assert.IsNotNull(context); Assert.IsNotNull(ns); Assert.IsNotNull(feature); Assert.IsNotNull(value); Assert.AreEqual(5, value.X); value.HasVisited = true; } } public class MyDictifyContext { [Feature] public int A { get; set; } [Feature(Dictify = true)] public float[] B { get; set; } } [TestClass] public class TestSerializer { [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestCustomFeaturizer() { var context = new MyContext() { Feature = new CustomClass() { X = 5 }}; using (var vw = new VowpalWabbit("")) { var serializer = VowpalWabbitSerializerFactory.CreateSerializer(new VowpalWabbitSettings { CustomFeaturizer = new List { typeof(CustomFeaturizer) } }) .Create(vw); var example = serializer.Serialize(context); Assert.IsNotNull(example); example.Dispose(); } Assert.IsTrue(context.Feature.HasVisited); } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestCustomFeaturizerOverideMethod() { var context = new MyContext() { Feature = new CustomClass() { X = 5 } }; using (var vw = new VowpalWabbit("")) { var serializer = VowpalWabbitSerializerFactory.CreateSerializer(new VowpalWabbitSettings { Schema = new Schema { Features = new List { new FeatureExpression(typeof(CustomClass), "Feature", // TODO: looks a bit awkward for an API. The compiler needs to know what property to access to copy the value into the Feature object valueExpression => Expression.Property(valueExpression, (PropertyInfo)ReflectionHelper.GetInfo((MyContext m) => m.Feature)), overrideSerializeMethod: (MethodInfo)ReflectionHelper.GetInfo((CustomFeaturizer c) => c.MarshalFeature(null, null, null, null))) } } }).Create(vw); var example = serializer.Serialize(context); Assert.IsNotNull(example); example.Dispose(); } Assert.IsTrue(context.Feature.HasVisited); } [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestDictify() { using (var vw = new VowpalWabbit(new VowpalWabbitSettings { EnableStringExampleGeneration = true, EnableStringFloatCompact = true })) using (var serializer = VowpalWabbitSerializerFactory.CreateSerializer(vw.Settings).Create(vw)) { var dictionary = new Dictionary(); var ctx = new MyDictifyContext { A = 5, B = new[] { 1f, 2f, 3f } }; var str = serializer.SerializeToString(ctx, dictionary: dictionary); Assert.AreEqual(" | A:5 d0", str); Assert.AreEqual(1, dictionary.Count); Assert.IsTrue(dictionary.ContainsKey(" 0:1 1:2 2:3")); Assert.AreEqual("d0", dictionary[" 0:1 1:2 2:3"]); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestTelemetryProcessor.cs000066400000000000000000000027261332666127000246510ustar00rootroot00000000000000using Microsoft.ApplicationInsights.Channel; using Microsoft.ApplicationInsights.DataContracts; using Microsoft.ApplicationInsights.Extensibility; using Microsoft.ApplicationInsights.Extensibility.Implementation; using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; namespace cs_unittest { public class TestTelemetryProcessor: ITelemetryProcessor { private ITelemetryProcessor Next { get; set; } public SynchronizedCollection Exceptions { get; private set; } // Link processors to each other in a chain. public TestTelemetryProcessor(ITelemetryProcessor next, SynchronizedCollection exceptions) { this.Next = next; this.Exceptions = exceptions; } public void Process(ITelemetry item) { var tt = item as TraceTelemetry; if (tt != null) { Console.WriteLine($"Trace: {tt.Message}"); foreach (var prop in tt.Properties) Console.WriteLine($"\t{prop.Key}: {prop.Value}"); } var et = item as ExceptionTelemetry; if (et != null) { this.Exceptions.Add(et); Console.WriteLine($"Exception: {et.Message}. {et.Exception.StackTrace}"); } this.Next.Process(item); } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestTracing.cs000066400000000000000000000016731332666127000223660ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; namespace cs_unittest { [TestClass] public class TestTracing { [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestTraceListener() { var messages = new List(); using (var vw = new VowpalWabbit(new VowpalWabbitSettings { TraceListener = msg => messages.Add(msg), Verbose = true })) { vw.Learn("1 |a x:2"); vw.Learn("2 |a x:3"); } var trace = string.Join("\n", messages); Assert.AreEqual(16, messages.Count, $"Expected 16 lines. Found {messages.Count}. '{trace}'"); Assert.AreEqual("total feature number = 4", messages[15]); } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestWiki.cs000066400000000000000000000036271332666127000217030ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; using VW.Labels; namespace cs_unittest { [TestClass] public class TestWikiClass { [TestMethod] [TestCategory("Vowpal Wabbit")] public void TestWiki() { using (var vw = new VW.VowpalWabbit("-f rcv1.model")) { // 1 |f 13:3.9656971e-02 24:3.4781646e-02 69:4.6296168e-02 using (var exampleBuilder = new VW.VowpalWabbitExampleBuilder(vw)) { // important to dispose the namespace builder at the end, as data is only added to the example // if there is any feature added to the namespace using (var ns = exampleBuilder.AddNamespace('f')) { var namespaceHash = vw.HashSpace("f"); var featureHash = vw.HashFeature("13", namespaceHash); ns.AddFeature(featureHash, 8.5609287e-02f); featureHash = vw.HashFeature("24", namespaceHash); ns.AddFeature(featureHash, 3.4781646e-02f); featureHash = vw.HashFeature("69", namespaceHash); ns.AddFeature(featureHash, 4.6296168e-02f); } exampleBuilder.ApplyLabel(new SimpleLabel() { Label = 1 }); // hand over of memory management using (var example = exampleBuilder.CreateExample()) { VowpalWabbitExampleValidator.Validate("1 |f 13:8.5609287e-02 24:3.4781646e-02 69:4.6296168e-02", example, VowpalWabbitLabelComparator.Simple); vw.Learn(example); } } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TestWrapper.cs000066400000000000000000000030121332666127000224040ustar00rootroot00000000000000using System.IO; using cs_test; using Microsoft.VisualStudio.TestTools.UnitTesting; using VW; namespace cs_unittest { [TestClass] public class TestWrapper : TestBase { [TestMethod] [TestCategory("Vowpal Wabbit")] public void VwCleanupTest() { new VowpalWabbit("-k -l 20 --initial_t 128000 --power_t 1 -c --cache_file VwCleanupTest.cache --passes 8 --invariant --ngram 3 --skips 1 --holdout_off") .Dispose(); } [TestMethod] [TestCategory("Vowpal Wabbit")] public void VwCleanupTestError() { try { if (Directory.Exists("models_out")) Directory.Delete("models_out", true); var vw = new VowpalWabbit("-k -l 20 --initial_t 128000 --power_t 1 -f models_out/0001.model -c --passes 8 --invariant --ngram 3 --skips 1 --holdout_off"); vw.Dispose(); Assert.Fail("Excepted exception not thrown"); } catch (VowpalWabbitException e) { Assert.IsFalse(string.IsNullOrEmpty(e.Filename)); Assert.AreNotEqual(0, e.LineNumber); Assert.IsTrue(e.Message.Contains("No such file or directory"), e.Message); } } [TestMethod] [TestCategory("Vowpal Wabbit")] public void VwModelRefCountingTest() { var model = new VowpalWabbitModel(""); model.Dispose(); } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TrainSet0002Dat.cs000066400000000000000000000042071332666127000226170ustar00rootroot00000000000000using System.Collections.Generic; using System.Globalization; using cs_unittest; using VW.Labels; using VW.Serializer.Attributes; namespace TrainSet0002Dat { public class Data : BaseData { [Feature(FeatureGroup = 'T', Name = "")] public string T { get; set; } [Feature(FeatureGroup = 'f')] public List> F { get; set; } public ILabel Label { get; set; } } public class DataListener : VowpalWabbitListenerToEvents { private Data example; public override void EnterExample(VowpalWabbitParser.ExampleContext context) { this.example = new Data() { F = new List>() }; } public override void ExitExample(VowpalWabbitParser.ExampleContext context) { this.example.Line = context.GetText(); this.Created(this.example.Line, this.example, this.example.Label); } public override void ExitLabel_simple(VowpalWabbitParser.Label_simpleContext context) { var simpleLabel = new SimpleLabel() { Label = context.value.value }; if (context.weight != null) simpleLabel.Weight = context.weight.value; this.example.Label = simpleLabel; } public override void ExitNumber(VowpalWabbitParser.NumberContext context) { context.value = float.Parse(context.GetText(), CultureInfo.InvariantCulture); } public override void ExitFeatureSparse(VowpalWabbitParser.FeatureSparseContext context) { var index = context.index; var weight_index = index.Text; var x = context.x; if (x == null) { // hashed feature this.example.T = weight_index; } else { // sparse feature this.example.F.Add(new KeyValuePair(weight_index, context.x.value)); } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/TrainSetCs_testLdf.cs000066400000000000000000000053561332666127000236450ustar00rootroot00000000000000using System.Collections.Generic; using VW; using VW.Labels; using VW.Serializer.Attributes; namespace cs_unittest { public static class TrainSetCs_testLdf { public static Cs_TestData[] CreateSampleCbAdfData() { var sampleData = new Cs_TestData[3]; //shared | s_1 s_2 //0:1.0:0.5 | a_1 b_1 c_1 //| a_2 b_2 c_2 //| a_3 b_3 c_3 //| b_1 c_1 d_1 //0:0.0:0.5 | b_2 c_2 d_2 //| a_1 b_1 c_1 //| a_3 b_3 c_3 sampleData[0] = new Cs_TestData { Shared = new[] { "s_1", "s_2" }, ActionDependentFeatures = new[] { new Cs_TestCs_TestDataADF { Features = new[] { "a_1", "b_1", "c_1" }, Label = new ContextualBanditLabel { Cost = 1f, Probability = .5f } }, new Cs_TestCs_TestDataADF { Features = new [] { "a_2","b_2","c_2" } }, new Cs_TestCs_TestDataADF { Features = new [] { "a_3","b_3","c_3" } }, } }; sampleData[1] = new Cs_TestData { ActionDependentFeatures = new[] { new Cs_TestCs_TestDataADF { Features = new [] { "b_1","c_1","d_1" } }, new Cs_TestCs_TestDataADF { Features = new [] { "b_2", "c_2", "d_2" }, Label = new ContextualBanditLabel { Cost = 0f, Probability = .5f } }, } }; sampleData[2] = new Cs_TestData { ActionDependentFeatures = new[] { new Cs_TestCs_TestDataADF { Features = new [] { "a_1","b_1","c_1" } }, new Cs_TestCs_TestDataADF { Features = new [] { "a_3","b_3","c_3" } } } }; return sampleData; } } public class Cs_TestData { [Feature] public string[] Shared { get; set; } public IReadOnlyList ActionDependentFeatures { get; set; } } public class Cs_TestCs_TestDataADF { [Feature] public string[] Features { get; set; } public override string ToString() { return string.Join(" ", this.Features); } public ILabel Label { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/VWTestHelper.cs000066400000000000000000000262761332666127000225010ustar00rootroot00000000000000using System; using System.Globalization; using System.IO; using System.Linq; using Antlr4.Runtime; using Antlr4.Runtime.Atn; using Antlr4.Runtime.Tree; using Microsoft.VisualStudio.TestTools.UnitTesting; using VW; using VW.Serializer; namespace cs_unittest { internal static class VWTestHelper { internal static void ParseInput(string text, IParseTreeListener listener) { ParseInput(new AntlrInputStream(text), listener); } internal static void ParseInput(Stream stream, IParseTreeListener listener) { ParseInput(new UnbufferedCharStream(stream), listener); } internal static void ParseInput(ICharStream stream, IParseTreeListener listener) { // optimized for memory consumption var lexer = new VowpalWabbitLexer(stream) { TokenFactory = new CommonTokenFactory(copyText: true) }; var tokens = new UnbufferedTokenStream(lexer); var parser = new VowpalWabbitParser(tokens) { // Note; don't disable, as it is required to access the line // BuildParseTree = false, }; // fast than LL(*) parser.Interpreter.PredictionMode = PredictionMode.Sll; parser.AddParseListener(listener); parser.AddErrorListener(new TestErrorListener()); parser.start(); } internal static void Learn(string args, string inputFile, string stderrFile) where TListener : VowpalWabbitListenerToEvents, new() { using (var vw = new VowpalWabbit(args)) using (var validate = new VowpalWabbitExampleValidator(args)) { var listener = new TListener(); listener.Created = (line, data, label) => { if (data == null) { Assert.Fail("got empty example"); } validate.Validate(line, data, label); vw.Learn(data, label); }; VWTestHelper.ParseInput(File.OpenRead(inputFile), listener); AssertEqual(stderrFile, vw.Native.PerformanceStatistics); } } internal static void Predict(string args, string inputFile, string referenceFile = null) where TData : BaseData where TListener : VowpalWabbitListenerToEvents, new() { float[] references = null; var index = 0; if (referenceFile != null) { references = File.ReadAllLines(referenceFile) .Select(l => float.Parse(l.Split(' ')[0], CultureInfo.InvariantCulture)) .ToArray(); } using (var vwRef = new VowpalWabbit(args)) using (var vwModel = new VowpalWabbitModel(args)) using (var vwValidate = new VowpalWabbit(args)) using (var vwInMemoryShared2 = new VowpalWabbit(new VowpalWabbitSettings { Model = vwModel })) using (var validate = new VowpalWabbitExampleValidator(args)) { var listener = new TListener(); listener.Created = (line, x, label) => { validate.Validate(line, x, label); var expectedDynamic = vwRef.Predict(x.Line, VowpalWabbitPredictionType.Dynamic); Assert.IsInstanceOfType(expectedDynamic, typeof(float)); var expected = vwRef.Predict(x.Line, VowpalWabbitPredictionType.Scalar); var actual = vwInMemoryShared2.Predict(x, VowpalWabbitPredictionType.Scalar, label); Assert.AreEqual((float)expectedDynamic, actual, 1e-5); Assert.AreEqual(expected, actual, 1e-5); if (references != null) Assert.AreEqual(references[index++], actual, 1e-5); }; } } internal static void AssertEqual(string expectedFile, VowpalWabbitPerformanceStatistics actual) { var expectedPerformanceStatistics = ReadPerformanceStatistics(expectedFile); AssertEqual(expectedPerformanceStatistics, actual); } internal static void FuzzyEqual(double? expected, double actual, double epsilon, string message) { if (expected == null) return; // from test/RunTests var delta = Math.Abs(expected.Value - actual); if (delta > epsilon) { // We have a 'big enough' difference, but this difference // may still not be meaningful in all contexts: // Big numbers should be compared by ratio rather than // by difference // Must ensure we can divide (avoid div-by-0) if (Math.Abs(actual) <= 1.0) { // If numbers are so small (close to zero), // ($delta > $Epsilon) suffices for deciding that // the numbers are meaningfully different Assert.Fail(string.Format("{0} vs {1}: delta={2} > Epsilon={3}: {4}", expected, actual, delta, epsilon, message)); } // Now we can safely divide (since abs($word2) > 0) // and determine the ratio difference from 1.0 var ratio_delta = Math.Abs(expected.Value / actual - 1.0); if (ratio_delta > epsilon) { Assert.Fail(string.Format("{0} vs {1}: delta={2} > Epsilon={3}: {4}", expected, actual, delta, epsilon, message)); } } } internal static void AssertEqual(VowpalWabbitPerformanceStatistics expected, VowpalWabbitPerformanceStatistics actual) { if (expected.TotalNumberOfFeatures != actual.TotalNumberOfFeatures) { Console.Error.WriteLine( "Warning: total number of features differs. Expected: {0} vs. actual: {1}", expected.TotalNumberOfFeatures, actual.TotalNumberOfFeatures); } Assert.AreEqual(expected.NumberOfExamplesPerPass, actual.NumberOfExamplesPerPass, "NumberOfExamplesPerPass"); FuzzyEqual(expected.AverageLoss, actual.AverageLoss, 1e-3, "AverageLoss"); FuzzyEqual(expected.BestConstant, actual.BestConstant, 1e-3, "BestConstant"); // TODO: something weir'd is happening here. BestConstantsLoss is 0 if using RunAll // has the proper value if just the unit test is run //Console.WriteLine(expected.BestConstantLoss + " vs. " + actual.BestConstantLoss); //Assert.AreEqual(expected.BestConstantLoss, actual.BestConstantLoss, 1e-5); FuzzyEqual(expected.WeightedExampleSum, actual.WeightedExampleSum, 1e-3, "WeightedExampleSum"); FuzzyEqual(expected.WeightedLabelSum, actual.WeightedLabelSum, 1e-3, "WeightedLabelSum"); } internal static void AssertEqual(VowpalWabbitStdErrPerformanceStatistics expected, VowpalWabbitPerformanceStatistics actual) { if (expected.TotalNumberOfFeatures != actual.TotalNumberOfFeatures) { Console.Error.WriteLine( "Warning: total number of features differs. Expected: {0} vs. actual: {1}", expected.TotalNumberOfFeatures, actual.TotalNumberOfFeatures); } if (expected.NumberOfExamplesPerPass != null) Assert.AreEqual(expected.NumberOfExamplesPerPass, actual.NumberOfExamplesPerPass, "NumberOfExamplesPerPass"); FuzzyEqual(expected.AverageLoss, actual.AverageLoss, 1e-3, "AverageLoss"); FuzzyEqual(expected.BestConstant, actual.BestConstant, 1e-3, "BestConstant"); // TODO: something weir'd is happening here. BestConstantsLoss is 0 if using RunAll // has the proper value if just the unit test is run //Console.WriteLine(expected.BestConstantLoss + " vs. " + actual.BestConstantLoss); //Assert.AreEqual(expected.BestConstantLoss, actual.BestConstantLoss, 1e-5); FuzzyEqual(expected.WeightedExampleSum, actual.WeightedExampleSum, 1e-3, "WeightedExampleSum"); FuzzyEqual(expected.WeightedLabelSum, actual.WeightedLabelSum, 1e-3, "WeightedLabelSum"); } internal static VowpalWabbitStdErrPerformanceStatistics ReadPerformanceStatistics(string filename) { var lines = File.ReadAllLines(filename); var numExamples = FindULongEntry(lines, "number of examples per pass = "); if (numExamples == 0) numExamples = FindULongEntry(lines, "number of examples = "); var stats = new VowpalWabbitStdErrPerformanceStatistics() { NumberOfExamplesPerPass = numExamples, TotalNumberOfFeatures = FindULongEntry(lines, "total feature number = "), AverageLoss = FindAverageLossEntry(lines), BestConstant = FindDoubleEntry(lines, "best constant = "), BestConstantLoss = FindDoubleEntry(lines, "best constant's loss = "), WeightedExampleSum = FindDoubleEntry(lines, "weighted example sum = "), WeightedLabelSum = FindDoubleEntry(lines, "weighted label sum = ") }; return stats; } private static double? FindAverageLossEntry(string[] lines) { var label = "average loss = "; var candidate = lines.FirstOrDefault(l => l.StartsWith(label)); if (candidate == null) { return null; } candidate = candidate.Substring(label.Length); if (candidate.EndsWith(" h")) { candidate = candidate.Substring(0, candidate.Length - 2); } var ret = 0.0; if (double.TryParse(candidate, NumberStyles.Float, CultureInfo.InvariantCulture, out ret)) { return ret; } return null; } private static double? FindDoubleEntry(string[] lines, string label) { var candidate = lines.FirstOrDefault(l => l.StartsWith(label)); if (candidate == null) { return null; } var ret = 0.0; if (double.TryParse(candidate.Substring(label.Length), NumberStyles.Float, CultureInfo.InvariantCulture, out ret)) { return ret; } return null; } private static ulong? FindULongEntry(string[] lines, string label) { var candidate = lines.FirstOrDefault(l => l.StartsWith(label)); if (candidate == null) { return null; } ulong ret = 0L; if (ulong.TryParse(candidate.Substring(label.Length), NumberStyles.Float, CultureInfo.InvariantCulture, out ret)) { return ret; } return null; } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/VowpalWabbit.g4000066400000000000000000000013611332666127000224370ustar00rootroot00000000000000grammar VowpalWabbit; number returns [float value] : NUMBER; label_simple : value=number (WS weight=number)? WS; label_cbf : value=INT ':' weight=NUMBER; namespace : '|' name=STRING? (WS feature)+ WS?; feature : index=(STRING | NUMBER) (':' x=number)? # FeatureSparse | ':' x=NUMBER # FeatureDense ; // needs more testing tag : ('`' STRING)? WS | STRING ; example : label_simple tag? namespace (WS namespace)*; start : (example NEWLINE)* example (NEWLINE | EOF); // greedy matching, if same length its matched in order NUMBER : INT | FLOAT; INT : [+-]? [0-9]+ ([Ee] '-'? [0-9]+)?; FLOAT : [+-]? [0-9]* '.' [0-9]+ ([Ee] '-'? [0-9]+)?; WS : [ \t]+; NEWLINE : '\r'? '\n'; STRING : ~([:| \t\r\n])+; vowpal-wabbit-8.6.1.dfsg1/cs/unittest/VowpalWabbitExampleJsonValidator.cs000066400000000000000000000232761332666127000265570ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using Newtonsoft.Json; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Threading.Tasks; using VW; using VW.Labels; using VW.Serializer; namespace cs_unittest { internal sealed class VowpalWabbitExampleJsonValidator : IDisposable { private VowpalWabbit vw; internal VowpalWabbitExampleJsonValidator(string args = null) : this(new VowpalWabbitSettings((args ?? "") + " --json")) { } internal VowpalWabbitExampleJsonValidator(VowpalWabbitSettings settings) { settings = (VowpalWabbitSettings)settings.Clone(); settings.EnableStringExampleGeneration = true; this.vw = new VowpalWabbit(settings); } public void Validate(string line, VowpalWabbitExampleCollection example, IVowpalWabbitLabelComparator labelComparator = null, ILabel label = null, bool validateVowpalWabbitString = true) { Assert.IsNotNull(example); var jsonExample = example as VowpalWabbitSingleLineExampleCollection; Assert.IsNotNull(jsonExample); using (var strExample = this.vw.ParseLine(line)) { var diff = strExample.Diff(this.vw, jsonExample.Example, labelComparator); Assert.IsNull(diff, diff + " generated string: '" + jsonExample.VowpalWabbitString + "'"); if (validateVowpalWabbitString) { using (var strJsonExample = this.vw.ParseLine(jsonExample.Example.VowpalWabbitString)) { diff = strExample.Diff(this.vw, strJsonExample, labelComparator); Assert.IsNull(diff, diff); } } } } public void Validate(string line, string json, IVowpalWabbitLabelComparator labelComparator = null, ILabel label = null, bool enableNativeJsonValidation = true) { using (var jsonSerializer = new VowpalWabbitJsonSerializer(this.vw)) using (var jsonExample = jsonSerializer.ParseAndCreate(json, label)) { this.Validate(line, jsonExample, labelComparator, label); if (enableNativeJsonValidation) { var examples = this.vw.ParseJson(json); Assert.AreEqual(1, examples.Count); using (var jsonNativeExample = new VowpalWabbitSingleLineExampleCollection(this.vw, examples[0])) { this.Validate(line, jsonNativeExample, labelComparator, label, validateVowpalWabbitString: false); } } } } public void Validate(string[] lines, JsonReader jsonReader, IVowpalWabbitLabelComparator labelComparator = null, ILabel label = null, int? index = null, VowpalWabbitJsonExtension extension = null) { VowpalWabbitExample[] strExamples = new VowpalWabbitExample[lines.Count()]; try { for (int i = 0; i < lines.Length; i++) strExamples[i] = this.vw.ParseLine(lines[i]); using (var jsonSerializer = new VowpalWabbitJsonSerializer(this.vw)) { if (extension != null) { jsonSerializer.RegisterExtension(extension); // extension are not supported with native JSON parsing } using (var jsonExample = (VowpalWabbitMultiLineExampleCollection)jsonSerializer.ParseAndCreate(jsonReader, label, index)) { var jsonExamples = new List(); if (jsonExample.SharedExample != null) jsonExamples.Add(jsonExample.SharedExample); jsonExamples.AddRange(jsonExample.Examples); Assert.AreEqual(strExamples.Length, jsonExamples.Count); for (int i = 0; i < strExamples.Length; i++) { using (var strJsonExample = this.vw.ParseLine(jsonExamples[i].VowpalWabbitString)) { var diff = strExamples[i].Diff(this.vw, jsonExamples[i], labelComparator); Assert.IsNull(diff, diff + " generated string: '" + jsonExamples[i].VowpalWabbitString + "'"); diff = strExamples[i].Diff(this.vw, strJsonExample, labelComparator); Assert.IsNull(diff, diff); } } } } } finally { foreach (var ex in strExamples) if (ex != null) ex.Dispose(); } } public void Validate(string[] lines, List examples, IVowpalWabbitLabelComparator labelComparator = null) { VowpalWabbitExample[] strExamples = new VowpalWabbitExample[lines.Count()]; try { for (int i = 0; i < lines.Length; i++) strExamples[i] = this.vw.ParseLine(lines[i]); for (int i = 0; i < strExamples.Length; i++) { var diff = strExamples[i].Diff(this.vw, examples[i], labelComparator); Assert.IsNull(diff, diff + " generated string: '" + strExamples[i].VowpalWabbitString + "'"); } } finally { foreach (var ex in strExamples) if (ex != null) ex.Dispose(); } } public void Validate(string[] lines, string json, IVowpalWabbitLabelComparator labelComparator = null, ILabel label = null, int? index = null, VowpalWabbitJsonExtension extension = null, bool enableNativeJsonValidation = true) { VowpalWabbitExample[] strExamples = new VowpalWabbitExample[lines.Count()]; try { for (int i = 0; i < lines.Length; i++) strExamples[i] = this.vw.ParseLine(lines[i]); using (var jsonSerializer = new VowpalWabbitJsonSerializer(this.vw)) { if (extension != null) { jsonSerializer.RegisterExtension(extension); // extension are not supported with native JSON parsing enableNativeJsonValidation = false; } List jsonNativeExamples = null; try { if (enableNativeJsonValidation) { jsonNativeExamples = this.vw.ParseJson(json); Assert.IsNotNull(jsonNativeExamples); } using (var jsonExample = (VowpalWabbitMultiLineExampleCollection)jsonSerializer.ParseAndCreate(json, label, index)) { var jsonExamples = new List(); if (jsonExample.SharedExample != null) jsonExamples.Add(jsonExample.SharedExample); jsonExamples.AddRange(jsonExample.Examples); Assert.AreEqual(strExamples.Length, jsonExamples.Count); if (enableNativeJsonValidation) Assert.AreEqual(strExamples.Length, jsonNativeExamples.Count); for (int i = 0; i < strExamples.Length; i++) { using (var strJsonExample = this.vw.ParseLine(jsonExamples[i].VowpalWabbitString)) { var diff = strExamples[i].Diff(this.vw, jsonExamples[i], labelComparator); Assert.IsNull(diff, diff + " generated string: '" + jsonExamples[i].VowpalWabbitString + "'"); diff = strExamples[i].Diff(this.vw, strJsonExample, labelComparator); Assert.IsNull(diff, diff); if (enableNativeJsonValidation) { diff = strExamples[i].Diff(this.vw, jsonNativeExamples[i], labelComparator); Assert.IsNull(diff, diff); } } } } } finally { if (jsonNativeExamples != null) { foreach (var ex in jsonNativeExamples) ex.Dispose(); } } } } finally { foreach (var ex in strExamples) if (ex != null) ex.Dispose(); } } public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.vw != null) { this.vw.Dispose(); this.vw = null; } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/VowpalWabbitExampleValidator.cs000066400000000000000000000217701332666127000257220ustar00rootroot00000000000000using Microsoft.VisualStudio.TestTools.UnitTesting; using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; using VW; using VW.Labels; using VW.Serializer; namespace cs_unittest { internal static class VowpalWabbitExampleValidator { internal static void Validate(string line, VowpalWabbitExample ex, IVowpalWabbitLabelComparator comparator, string args = null) { using (var vw = new VowpalWabbit(args)) using (var strExample = vw.ParseLine(line)) { var diff = strExample.Diff(vw, ex, comparator); Assert.IsNull(diff, diff + " generated string: '" + ex.VowpalWabbitString + "'"); } } } internal sealed class VowpalWabbitExampleValidator : IDisposable { private VowpalWabbit vw; private VowpalWabbit vwNative; private VowpalWabbit vwJson; private VowpalWabbitSingleExampleSerializerCompiler compiler; private Action serializer; private Action serializerNative; private IVowpalWabbitSerializer factorySerializer; private static string FixArgs(string args) { // remove model writing args = Regex.Replace(args, @"-f\s+[^ -]+", " "); // remove cache file args = Regex.Replace(args, @"-c\s+([^ -]+)?", " "); return args; } internal VowpalWabbitExampleValidator(string args) : this(new VowpalWabbitSettings(FixArgs(args))) { } internal VowpalWabbitExampleValidator(VowpalWabbitSettings settings) { var stringSettings = (VowpalWabbitSettings)settings.Clone(); stringSettings.EnableStringExampleGeneration = true; this.vw = new VowpalWabbit(stringSettings); var jsonSettings = (VowpalWabbitSettings)settings.Clone(); jsonSettings.Arguments += " --json"; this.vwJson = new VowpalWabbit(jsonSettings); this.compiler = this.vw.Serializer as VowpalWabbitSingleExampleSerializerCompiler; if (this.compiler != null) this.serializer = this.compiler.Func(this.vw.Native); this.vwNative = new VowpalWabbit(settings); this.compiler = this.vwNative.Serializer as VowpalWabbitSingleExampleSerializerCompiler; if (compiler != null) this.serializerNative = this.compiler.Func(this.vwNative.Native); this.factorySerializer = VowpalWabbitSerializerFactory.CreateSerializer(stringSettings).Create(this.vw.Native); } public void Validate(string line, TExample example, ILabel label = null) { IVowpalWabbitLabelComparator comparator; if (label == null || label == SharedLabel.Instance) { comparator = null; } else if (label is SimpleLabel) { comparator = VowpalWabbitLabelComparator.Simple; } else if (label is ContextualBanditLabel) { comparator = VowpalWabbitLabelComparator.ContextualBandit; } else { throw new ArgumentException("Label type not supported: " + label.GetType()); } using (var context = new VowpalWabbitMarshalContext(this.vw.Native)) using (var contextNative = new VowpalWabbitMarshalContext(this.vwNative.Native)) { // validate string serializer this.serializer(context, example, label); this.serializerNative(contextNative, example, label); // natively parsed string example compared against: // (1) natively build example // (2) string serialized & natively parsed string example using (var strExample = this.vw.Native.ParseLine(line)) using (var strConvertedExample = this.vw.Native.ParseLine(context.ToString())) using (var nativeExample = contextNative.ExampleBuilder.CreateExample()) using (var nativeExampleWithString = this.factorySerializer.Serialize(example, label)) { var diff = strExample.Diff(this.vw.Native, strConvertedExample, comparator); Assert.IsNull(diff, diff + " generated string: '" + context.ToString() + "'"); diff = strExample.Diff(this.vw.Native, nativeExample, comparator); Assert.IsNull(diff, diff); if (!strExample.IsNewLine) { Assert.IsFalse(string.IsNullOrEmpty(nativeExampleWithString.VowpalWabbitString)); Assert.IsFalse(string.IsNullOrEmpty(this.factorySerializer.SerializeToString(example, label))); } if (this.vw.Native.Settings.TypeInspector == JsonTypeInspector.Default) { var jsonStr = JsonConvert.SerializeObject(example); using (var jsonSerializer = new VowpalWabbitJsonSerializer(this.vw.Native)) { using (var jsonExample = jsonSerializer.ParseAndCreate(jsonStr, label)) { var ex = ((VowpalWabbitSingleLineExampleCollection)jsonExample).Example; diff = strExample.Diff(this.vw.Native, ex, comparator); Assert.IsNull(diff, $"{diff}\n json: '{jsonStr}'"); } } List exampleList = null; try { exampleList = this.vwJson.ParseJson(jsonStr); Assert.AreEqual(1, exampleList.Count); diff = strExample.Diff(this.vw.Native, exampleList[0], comparator); Assert.IsNull(diff, $"{diff}\n json: '{jsonStr}'"); } finally { if (exampleList != null) { foreach (var ex in exampleList) ex.Dispose(); } } } } } } public void Validate(IEnumerable lines, TExample example, IVowpalWabbitLabelComparator labelComparator = null, ILabel label = null) { // natively parsed string example compared against: // (1) natively build example // (2) string serialized & natively parsed string example var strExamples = lines.Select(l => this.vw.Native.ParseLine(l)).ToArray(); using (var nativeExampleWithString = (VowpalWabbitMultiLineExampleCollection)this.factorySerializer.Serialize(example, label)) { var examplesToCompare = new List(); if (nativeExampleWithString.SharedExample != null) examplesToCompare.Add(nativeExampleWithString.SharedExample); examplesToCompare.AddRange(nativeExampleWithString.Examples); examplesToCompare = examplesToCompare.Where(e => !e.IsNewLine).ToList(); Assert.AreEqual(strExamples.Length, examplesToCompare.Count); for (int i = 0; i < strExamples.Length; i++) { var diff = strExamples[i].Diff(this.vw.Native, examplesToCompare[i], labelComparator); Assert.IsNull(diff, diff + " generated string: '" + examplesToCompare[i].VowpalWabbitString + "'"); } } foreach (var ex in strExamples) ex.Dispose(); } public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { if (disposing) { if (this.vw != null) { this.vw.Dispose(); this.vw = null; } if (this.vwNative != null) { this.vwNative.Dispose(); this.vwNative = null; } if (this.vwJson != null) { this.vwJson.Dispose(); this.vwJson = null; } if (this.factorySerializer != null) { this.factorySerializer.Dispose(); this.factorySerializer = null; } } } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/VowpalWabbitListenerToEvents.cs000066400000000000000000000003061332666127000257260ustar00rootroot00000000000000using System; using VW.Labels; namespace cs_unittest { public class VowpalWabbitListenerToEvents : VowpalWabbitBaseListener { public Action Created; } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/VowpalWabbitStdErrPerformanceStatistics.cs000066400000000000000000000011231332666127000301070ustar00rootroot00000000000000using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace cs_unittest { class VowpalWabbitStdErrPerformanceStatistics { public double? AverageLoss { get; set; } public double? BestConstant { get; set; } public double? BestConstantLoss { get; set; } public ulong? NumberOfExamplesPerPass { get; set; } public ulong? TotalNumberOfFeatures { get; set; } public double? WeightedExampleSum { get; set; } public double? WeightedLabelSum { get; set; } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/azure.provision.ps1000066400000000000000000000004301332666127000234000ustar00rootroot00000000000000Login-AzureRmAccount Select-AzureRmSubscription -SubscriptionId "FILL ME IN" $rg = 'FILL ME IN' New-AzureRmResourceGroup -Name $rg -Location "East US" # -DeploymentDebugLogLevel All New-AzureRmResourceGroupDeployment -ResourceGroupName $rg -TemplateFile .\azuredeploy.json vowpal-wabbit-8.6.1.dfsg1/cs/unittest/azuredeploy.json000066400000000000000000000110201332666127000230310ustar00rootroot00000000000000{ "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", "contentVersion": "1.0.0.0", "variables": { "storageName": "[concat(resourceGroup().name, 'storage')]", "serviceBusName": "[concat(resourceGroup().name, 'sb')]", "eventHubInputName": "[concat(resourceGroup().name, 'input')]", "eventHubEvalName": "[concat(resourceGroup().name, 'eval')]" }, "resources": [ { "type": "Microsoft.Storage/storageAccounts", "name": "[variables('storageName')]", "apiVersion": "2015-06-15", "location": "[resourceGroup().location]", "properties": { "accountType": "Standard_LRS" } }, { "apiVersion": "2015-08-01", "name": "[concat(variables('serviceBusName'))]", "type": "Microsoft.ServiceBus/namespaces", "location": "[resourceGroup().location]", "kind": "Messaging", "sku": { "name": "Standard", "tier": "Standard", "capacity": 1 }, "resources": [ { "apiVersion": "2015-08-01", "name": "[concat(variables('serviceBusName'), '/', 'OwnerKey')]", "type": "Microsoft.ServiceBus/namespaces/authorizationRules", "location": "[resourceGroup().location]", "dependsOn": [ "[concat('Microsoft.ServiceBus/namespaces/', variables('serviceBusName'))]" ], "properties": { "KeyName": "OwnerKey", "ClaimType": "SharedAccessKey", "ClaimValue": "None", "Rights": [ "listen", "send", "manage" ], "Revision": -1 } } ] }, { "apiVersion": "2015-08-01", "name": "[concat(variables('serviceBusName'), '/', variables('eventHubInputName'))]", "type": "Microsoft.ServiceBus/namespaces/eventHubs", "location": "[resourceGroup().location]", "properties": { "path": "[variables('eventHubInputName')]", "MessageRetentionInDays": 7, "PartitionCount": 2 }, "dependsOn": [ "[concat('Microsoft.ServiceBus/namespaces/', variables('serviceBusName'))]" ], "resources": [ { "apiVersion": "2015-08-01", "name": "Manage", "type": "AuthorizationRules", "dependsOn": [ "[concat(resourceId('Microsoft.ServiceBus/namespaces', variables('serviceBusName')), '/eventHubs/', variables('eventHubInputName'))]" ], "properties": { "keyName": "Manage", "claimType": "SharedAccessKey", "claimValue": "None", "rights": [ "Send", "Listen", "Manage" ] } } ] }, { "apiVersion": "2015-08-01", "name": "[concat(variables('serviceBusName'), '/', variables('eventHubEvalName'))]", "type": "Microsoft.ServiceBus/namespaces/eventHubs", "location": "[resourceGroup().location]", "properties": { "path": "[variables('eventHubEvalName')]", "MessageRetentionInDays": 7, "PartitionCount": 2 }, "dependsOn": [ "[concat('Microsoft.ServiceBus/namespaces/', variables('serviceBusName'))]" ], "resources": [ { "apiVersion": "2015-08-01", "name": "Manage", "type": "AuthorizationRules", "dependsOn": [ "[concat(resourceId('Microsoft.ServiceBus/namespaces', variables('serviceBusName')), '/eventHubs/', variables('eventHubEvalName'))]" ], "properties": { "keyName": "Manage", "claimType": "SharedAccessKey", "claimValue": "None", "rights": [ "Send", "Listen", "Manage" ] } } ] } ], "outputs": { "storageConnectionString": { "type": "string", "value": "[concat('DefaultEndpointsProtocol=https;AccountName=',variables('storageName'),';AccountKey=',listkeys(resourceId('Microsoft.Storage/storageAccounts', variables('storageName')), '2015-06-15').key1)]" }, "eventHubInputConnectionString": { "type": "string", "value": "[listkeys(resourceId('Microsoft.ServiceBus/namespaces/eventhubs/authorizationRules', variables('serviceBusName'), variables('eventHubInputName'), 'Manage'), '2015-08-01').primaryConnectionString]" }, "eventHubEvalConnectionString": { "type": "string", "value": "[listkeys(resourceId('Microsoft.ServiceBus/namespaces/eventhubs/authorizationRules', variables('serviceBusName'), variables('eventHubEvalName'), 'Manage'), '2015-08-01').primaryConnectionString]" } } } vowpal-wabbit-8.6.1.dfsg1/cs/unittest/cs_unittest.csproj000066400000000000000000000445771332666127000234100ustar00rootroot00000000000000 Debug AnyCPU {007B7DE0-1D9B-498D-ACFC-E9D33058F22E} Library Properties cs_unittest cs_unittest v4.6 512 {3AC096D0-A1C2-E12C-1390-A8335801FDAB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} 10.0 $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion) $(ProgramFiles)\Common Files\microsoft shared\VSTT\$(VisualStudioVersion)\UITestExtensionPackages False UnitTest $(SolutionDir)\ true $(ProjectDir)..\..\vowpalwabbit\x64\Debug\ DEBUG;TRACE full x64 prompt MinimumRecommendedRules.ruleset $(ProjectDir)..\..\vowpalwabbit\x64\Release\ TRACE true pdbonly x64 prompt MinimumRecommendedRules.ruleset true ..\vw_key.snk true $(SolutionDir)\x64\DebugLeakCheck\ DEBUG;TRACE full x64 prompt MinimumRecommendedRules.ruleset $(SolutionDir)\packages\Antlr4.Runtime.4.6.4\lib\net45\Antlr4.Runtime.dll $(SolutionDir)\packages\Microsoft.ApplicationInsights.PerfCounterCollector.2.2.0\lib\net45\Microsoft.AI.PerfCounterCollector.dll True $(SolutionDir)\packages\Microsoft.ApplicationInsights.2.2.0\lib\net46\Microsoft.ApplicationInsights.dll True $(SolutionDir)\packages\Microsoft.Azure.KeyVault.Core.2.0.4\lib\net45\Microsoft.Azure.KeyVault.Core.dll True $(SolutionDir)\packages\Microsoft.Data.Edm.5.8.1\lib\net40\Microsoft.Data.Edm.dll True $(SolutionDir)\packages\Microsoft.Data.OData.5.8.1\lib\net40\Microsoft.Data.OData.dll True $(SolutionDir)\packages\Microsoft.Data.Services.Client.5.8.1\lib\net40\Microsoft.Data.Services.Client.dll True $(SolutionDir)\packages\WindowsAzure.ServiceBus.3.4.3\lib\net45-full\Microsoft.ServiceBus.dll True $(SolutionDir)\packages\Microsoft.Azure.ServiceBus.EventProcessorHost.2.2.8\lib\net45-full\Microsoft.ServiceBus.Messaging.EventProcessorHost.dll True $(SolutionDir)\packages\WindowsAzure.Storage.7.2.1\lib\net40\Microsoft.WindowsAzure.Storage.dll True $(SolutionDir)\packages\morelinq.1.4.0\lib\net35\MoreLinq.dll True $(SolutionDir)\packages\Newtonsoft.Json.9.0.1\lib\net45\Newtonsoft.Json.dll True $(SolutionDir)\packages\System.Reactive.Core.3.1.1\lib\net46\System.Reactive.Core.dll True $(SolutionDir)\packages\System.Reactive.Interfaces.3.1.1\lib\net45\System.Reactive.Interfaces.dll True $(SolutionDir)\packages\System.Reactive.Linq.3.1.1\lib\net46\System.Reactive.Linq.dll True $(SolutionDir)\packages\System.Reactive.PlatformServices.3.1.1\lib\net46\System.Reactive.PlatformServices.dll True $(SolutionDir)\packages\System.Reactive.Windows.Threading.3.1.1\lib\net45\System.Reactive.Windows.Threading.dll True $(SolutionDir)\packages\System.Spatial.5.8.1\lib\net40\System.Spatial.dll True $(SolutionDir)\packages\Microsoft.Tpl.Dataflow.4.5.24\lib\portable-net45+win8+wpa81\System.Threading.Tasks.Dataflow.dll True %(RecursiveDir)%(FileName)%(Extension) PreserveNewest True True RunTests.tt True True TestMarshalNumeric.tt {43e32c1d-21d6-4be3-8fa5-d5ba379cba53} azure {e4e962ae-7056-4eb0-a8c5-8dc824a4b068} cs {01a85382-c3e9-480a-86bf-fafe4ad107a7} cs_console {9e27fa94-ab34-4736-8427-fb7a2ba90d52} cs_json {08636f79-5577-4af2-8eed-ec8a5bc14ac4} cs_parallel {6a9cbeab-427f-4d8d-9559-b76b42b0895c} cs_testcommon {85e55ae0-3784-4968-9271-c81af560e1c1} vw_clr {e621e022-c1f8-433f-905a-ab9a3de072b7} vw_common PreserveNewest PreserveNewest Designer MSBuild:Compile True False TextTemplatingFileGenerator RunTests.cs TextTemplatingFileGenerator TestMarshalNumeric.cs False False False False This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. vowpal-wabbit-8.6.1.dfsg1/cs/unittest/json/000077500000000000000000000000001332666127000205525ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/cs/unittest/json/test_array.json000066400000000000000000000000501332666127000236150ustar00rootroot00000000000000[{"_label":1,"f":1}, {"_label":0,"f":2}]vowpal-wabbit-8.6.1.dfsg1/cs/unittest/json/test_newline.json000066400000000000000000000000471332666127000241460ustar00rootroot00000000000000 {"_label":1,"f":1} {"_label":0,"f":2} vowpal-wabbit-8.6.1.dfsg1/cs/unittest/packages.config000066400000000000000000000040221332666127000225440ustar00rootroot00000000000000 vowpal-wabbit-8.6.1.dfsg1/cs/version.props000066400000000000000000000004071332666127000204750ustar00rootroot00000000000000 8.4.0.1 vowpal-wabbit-8.6.1.dfsg1/cs/vw_key.snk000066400000000000000000000011241332666127000177410ustar00rootroot00000000000000$RSA2QZR+8x':{iIRShA+u"L2JƃIf^<Ȃ Ҿdƈd j忿E HVc)TYG%̢/2%x5rVI<D> u*>T3O&VF]u7c6Ոl\0)$stw_E&FMjfI?ͦfciE},ܕ؃o*pjws/kkFW;XD܄;JF]H9zrkflIg ;(WAˍ't.w$ qWF۔ʓT瑑hEڸ&|u2-Z`BBoW5G{<szȈ"ޖg#AFA>2o_pj%5&B/^縰$.gnn5>.Jԛl8- J2#(M6X:qǎK RQK,GWƹa|5H' s8|1 .wr߄.Üd1vowpal-wabbit-8.6.1.dfsg1/demo/000077500000000000000000000000001332666127000162415ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/demo/advertising/000077500000000000000000000000001332666127000205605ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/demo/advertising/naive_baseline.py000077500000000000000000000054431332666127000241070ustar00rootroot00000000000000#!/usr/bin/python import sys from subprocess import call, Popen, PIPE from math import log, exp from os import devnull # The learning algorithm is vowpal wabbit, available at https://github.com/JohnLangford/vowpal_wabbit/wiki vw_train_cmd = '../../vowpalwabbit/vw -c -f model --bfgs --passes 30 -b 22 --loss_function logistic --l2 14 --termination 0.00001 --holdout_off' vw_test_cmd = '../../vowpalwabbit/vw -t -i model -p /dev/stdout' def get_features(line): feat = line[2:] # Bucketizing the integer features on a logarithmic scale for i in range(8): if feat[i]: v = int(feat[i]) if v>0: feat[i] = str(int(log(v+0.5)/log(1.5))) return ' '.join(['%d_%s' % (i,v) for i,v in enumerate(feat) if v]) def train_test_oneday(day): ts_beginning_test = 86400*(day-1) with open('data.txt') as f: line = f.readline() # Beginning of the training set: 3 weeks before the test period while int(line.split()[0]) < ts_beginning_test - 86400*21: line = f.readline() call('rm -f .cache', shell=True) vw = Popen(vw_train_cmd, shell=True, stdin=PIPE) print '---------- Training on days %d to %d ----------------' % (day-21, day-1) print while int(line.split()[0]) < ts_beginning_test: line = line[:-1].split('\t') label = -1 if line[1]: conv_ts = int(line[1]) if conv_ts < ts_beginning_test: label = 1 # Positive label iff conversion and the conversion occured before the test period out = '%d | %s' % (label, get_features(line)) print >>vw.stdin, out line = f.readline() vw.stdin.close() vw.wait() print print '---------- Testing on day %d ----------------' % (day-21) vw = Popen(vw_test_cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=open(devnull, 'w')) ll = 0 n = 0 # Test is one day long while int(line.split()[0]) < ts_beginning_test + 86400: line = line[:-1].split('\t') print >>vw.stdin, '| '+get_features(line) dotproduct = float(vw.stdout.readline()) # Test log likelihood if line[1]: # Positive example ll += log(1+exp(-dotproduct)) else: # Negative sample ll += log(1+exp(dotproduct)) n += 1 line = f.readline() return (ll, n) def main(): ll = 0 n = 0 # Iterating over the 7 test days for day in range(54,61): ll_day, n_day = train_test_oneday(day) ll += ll_day n += n_day print ll_day, n_day print print 'Average test log likelihood: %f' % (ll/n) if __name__ == "__main__": main() vowpal-wabbit-8.6.1.dfsg1/demo/advertising/readme.txt000066400000000000000000000013471332666127000225630ustar00rootroot00000000000000 ------ Criteo Conversion Logs ------ This dataset contains feature values and conversion feedback for millions of clicks on display ads. It served to benchmark algorithms described in the paper "Modeling Delayed Feedback in Display Advertising", KDD 2014 by O.Chapelle (http://olivier.chapelle.cc/pub/delayedConv.pdf) =================================================== Instructions - Go to http://labs.criteo.com/downloads/2014-conversion-logs-dataset/ - Download the tar file after agreeing to the terms and conditions - Untar the file - Move data.txt to this directory - Run the naive_baseline.py script ==================================================== Dataset assembled by Olivier Chapelle (o.chapelle@criteo.com) vowpal-wabbit-8.6.1.dfsg1/demo/dbpedia/000077500000000000000000000000001332666127000176315ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/demo/dbpedia/README000066400000000000000000000020531332666127000205110ustar00rootroot00000000000000This directory contains a simple end-to-end example to use vowpal wabbit for multi-class classification. The task relates to DBpedia ontology classification where the labels are one of 14 ontology classes (e.g. Company, Film, Athlete) and the input data consists of the title and content associated with the entity in DBpedia. This is adapted from the classification-example.sh from fastText (https://github.com/facebookresearch/fastText). In order to run: 1) Run `make` in the vowpal_wabbit root directory 2) Make classification example executable: `chmod +x ./classification-example.sh` 3) Run `./classification-example.sh` which will: (a) Download and parse the data to convert it into the vw format (b) Train a model (c) Evaluate the model (d) Output the predictions You can also evaluate the predictions (you'll need python and scikit-learn and its dependencies installed). After running the script above you can run the evaluation script (thanks to @Scott-Graham-Bose): python eval.py --truth data/dbpedia.test --pred result/dbpedia.test.predict vowpal-wabbit-8.6.1.dfsg1/demo/dbpedia/classification-example.sh000077500000000000000000000027221332666127000246170ustar00rootroot00000000000000#!/usr/bin/env bash # # Adapted from fastText/classification-example.sh myshuf() { perl -MList::Util=shuffle -e 'print shuffle(<>);' "$@"; } normalize_text() { tr '[:upper:]' '[:lower:]' | sed -e 's/^\([0-9][0-9]*\),/\1 | /g' | \ sed -e "s/'/ ' /g" -e 's/"//g' -e 's/\./ \. /g' -e 's/
/ /g' \ -e 's/,/ , /g' -e 's/(/ ( /g' -e 's/)/ ) /g' -e 's/\!/ \! /g' \ -e 's/\?/ \? /g' -e 's/\;/ /g' -e 's/\:/ /g' | tr -s " " | myshuf } RESULTDIR=result DATADIR=data mkdir -p "${RESULTDIR}" mkdir -p "${DATADIR}" if [ ! -f "${DATADIR}/dbpedia.train" ] then wget -c "https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz" -O "${DATADIR}/dbpedia_csv.tar.gz" tar -xzvf "${DATADIR}/dbpedia_csv.tar.gz" -C "${DATADIR}" cat "${DATADIR}/dbpedia_csv/train.csv" | normalize_text > "${DATADIR}/dbpedia.train" cat "${DATADIR}/dbpedia_csv/test.csv" | normalize_text > "${DATADIR}/dbpedia.test" fi VW_EXEC=../../vowpalwabbit/vw NUM_CLASSES=`cat ${DATADIR}/dbpedia_csv/classes.txt | wc -l` ${VW_EXEC} -d "${DATADIR}/dbpedia.train" \ --cache_file "${RESULTDIR}/dbpedia.cache" -f "${RESULTDIR}/dbpedia.bin" \ --oaa ${NUM_CLASSES} --passes 5 --ngram 2 --skips 2 \ --loss_function hinge --bit_precision 25 --l2 1e-5 \ -k --threads ${VW_EXEC} -t -d "${DATADIR}/dbpedia.test" \ -i "${RESULTDIR}/dbpedia.bin" ${VW_EXEC} -t -d "${DATADIR}/dbpedia.test" \ -i "${RESULTDIR}/dbpedia.bin" \ -p "${RESULTDIR}/dbpedia.test.predict" \ --quiet vowpal-wabbit-8.6.1.dfsg1/demo/dbpedia/eval.py000077500000000000000000000015271332666127000211420ustar00rootroot00000000000000#!/usr/bin/env python from __future__ import print_function import argparse from sklearn.metrics import classification_report, confusion_matrix if __name__ == '__main__': parser = argparse.ArgumentParser(description='Evaluate Classification Results') parser.add_argument('--truth', dest='truth', type=str, required=True, help='path to truth file') parser.add_argument('--pred', dest='pred', type=str, required=True, help='path to prediction file') args = parser.parse_args() y_true = [] with open(args.truth, 'rb') as t: for line in t: y_true.append(int(line.split(' ', 1)[0])) y_pred = [] with open(args.pred, 'rb') as p: for line in p: y_pred.append(int(line.strip())) print(confusion_matrix(y_true, y_pred)) print() print(classification_report(y_true, y_pred)) vowpal-wabbit-8.6.1.dfsg1/demo/dependencyparsing/000077500000000000000000000000001332666127000217435ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/demo/dependencyparsing/Makefile000066400000000000000000000022751332666127000234110ustar00rootroot00000000000000SHELL=/bin/bash VW=../../vowpalwabbit/vw python_exe=python2.7 train_data=wsj_train_subset test_data=wsj_test_subset tag=tags .SECONDARY: all: @cat README.md clean: rm -f *.model *.predictions *.parse *.vw *.cache *~ *.writing %.check: @test -x "$$(which $*)" || { \ echo "ERROR: you need to install $*" 1>&2; \ exit 1; \ } %.vw: @echo "preprocessing dependency parse data ..." 1>&2 @echo "Note that due to the licence issue, we only provide a subset of the corpus" 1>&2 @$(python_exe) parse_data.py $(train_data) $(train_data).vw @$(python_exe) parse_data.py $(test_data) $(test_data).vw dep.model: $(train_data).vw @$(VW) --passes 3 -d $< -k -c --search_rollin mix_per_roll --search_task dep_parser --search 12 --search_alpha 1e-5 --search_rollout oracle --holdout_off -f $@ --search_history_length 3 --search_no_caching -b 30 --root_label 8 --num_label 12 --nn 5 --ftrl dep.test.predictions: $(test_data).vw dep.model @$(VW) -d $< -t $(search_option) -i dep.model -p $@ dep.test.parse: dep.test.predictions @$(python_exe) parse_test_result.py $(test_data) $< $(tag) > $@ dep.perf: dep.test.parse @$(python_exe) evaluate.py dep.test.parse $(test_data) .PHONY: all clean vowpal-wabbit-8.6.1.dfsg1/demo/dependencyparsing/README.md000077500000000000000000000010431332666127000232230ustar00rootroot00000000000000Search for dependencies ------------------------------------- This demo shows the performance of a dependency parser implemented in search framework. For more details, see http://arxiv.org/abs/1503.05615 Note that due to the licence issue, we only provide a subset of English Penn Treebank. If you have the full set of the data, please change the path in Makefile accordingly. ### Instruction ### - `make dep.perf`: downloads the subset of the English Penn Treebank corpus, trains a dependency parser, and computes test set statistics. vowpal-wabbit-8.6.1.dfsg1/demo/dependencyparsing/evaluate.py000077500000000000000000000061071332666127000241320ustar00rootroot00000000000000#!/usr/bin/env python # evaluation script modified from redshift parser import os import sys from collections import defaultdict def pc(num, den): return (num / float(den+1e-100)) * 100 def fmt_acc(label, n, l_corr, u_corr, total_errs): l_pc = pc(l_corr, n) u_pc = pc(u_corr, n) err_pc = pc(n - l_corr, total_errs) return '%s\t%d\t%.3f\t%.3f\t%.3f' % (label, n, l_pc, u_pc, err_pc) def gen_toks(loc): sent_strs = open(str(loc)).read().strip().split('\n\n') token = None i = 0 for sent_str in sent_strs: tokens = [Token(i, tok_str.split()) for i, tok_str in enumerate(sent_str.split('\n'))] for token in tokens: yield sent_str, token class Token(object): def __init__(self, id_, attrs): self.id = id_ # CoNLL format if len(attrs) == 10: new_attrs = [str(int(attrs[0]) - 1)] new_attrs.append(attrs[1]) new_attrs.append(attrs[3]) new_attrs.append(str(int(attrs[-4]) - 1)) new_attrs.append(attrs[-3]) attrs = new_attrs self.label = attrs[-1] if self.label.lower() == 'root': self.label = 'ROOT' try: head = int(attrs[-2]) except: try: self.label = 'P' head = int(attrs[-1]) except: print attrs raise attrs.pop() attrs.pop() self.head = head self.pos = attrs.pop() self.word = attrs.pop() self.dir = 'R' if head >= 0 and head < self.id else 'L' def mymain(test_loc, gold_loc, eval_punct=False): if not os.path.exists(test_loc): test_loc.mkdir() n_by_label = defaultdict(lambda: defaultdict(int)) u_by_label = defaultdict(lambda: defaultdict(int)) l_by_label = defaultdict(lambda: defaultdict(int)) N = 0 u_nc = 0 l_nc = 0 for (sst, t), (ss, g) in zip(gen_toks(test_loc), gen_toks(gold_loc)): if not eval_punct and g.word in ",.-;:'\"!?`{}()[]": continue prev_g = g prev_t = t u_c = g.head == t.head l_c = u_c and g.label.lower() == t.label.lower() N += 1 l_nc += l_c u_nc += u_c n_by_label[g.dir][g.label] += 1 u_by_label[g.dir][g.label] += u_c l_by_label[g.dir][g.label] += l_c n_l_err = N - l_nc for D in ['L', 'R']: n_other = 0 l_other = 0 u_other = 0 for label, n in sorted(n_by_label[D].items(), key=lambda i: i[1], reverse=True): if n == 0: continue elif n < 100: n_other += n l_other += l_by_label[D][label] u_other += u_by_label[D][label] else: l_corr = l_by_label[D][label] u_corr = u_by_label[D][label] yield 'U: %.3f' % pc(u_nc, N) yield 'L: %.3f' % pc(l_nc, N) if __name__ == '__main__': if(sys.argv < 3): print 'Usage: parsed_pred_file gold_test_conll_file' sys.exit(0) for line in mymain(sys.argv[1], sys.argv[2], eval_punct=False): print line vowpal-wabbit-8.6.1.dfsg1/demo/dependencyparsing/parse_data.py000066400000000000000000000014531332666127000244230ustar00rootroot00000000000000from sys import argv hash = {} def readtags(): for line in open('tags').readlines(): hash[line.split()[0]] = int(line.strip().split()[1]) if __name__ == '__main__': c = 1 readtags() if len(argv) != 3: print 'parseDepData.py input output' data = open(argv[1]).readlines() writer = open(argv[2],'w') for line in data: if line == '\n': writer.write('\n') continue splits = line.strip().lower().split() strw = "|w %s"%splits[1].replace(":","COL"); strp = "|p %s"%splits[4].replace(":","COL"); tag = splits[8] if tag not in hash: hash[tag] = c c+=1 #writer.write('%s 1.0 %s:%s%s %s\n'%((int(splits[7])+1) + (hash[tag]<<8), int(splits[7]),tag,strw, strp)) writer.write('%s %s %s:%s%s %s\n' % (int(splits[7]), hash[tag], int(splits[7]), tag, strw, strp)) writer.close() vowpal-wabbit-8.6.1.dfsg1/demo/dependencyparsing/parse_test_result.py000066400000000000000000000013661332666127000260720ustar00rootroot00000000000000from sys import argv from sys import exit dict = {} if len(argv) <4: print "Usage: test_conll_file annotation_file_from_vw tag_id_mapping" exit(1) for data in open(argv[3]).readlines(): dict[data.strip().split()[1]] = data.strip().split()[0] annotation = open(argv[2]).readlines() #for item in list(annotation): # if item == ' w\n': # annotation.remove(item) for idx, line in enumerate(open(argv[1]).readlines()): item = line.split() # conll07 if len(item) ==10: item[-4] = annotation[idx].strip().split(":")[0] item[-3] = dict[annotation[idx].strip().split(":")[1]] # wsj corpus elif len(item) >0: # print idx item[-2] = annotation[idx].strip().split(":")[0] item[-1] = dict[annotation[idx].strip().split(":")[1]] print "\t".join(item) vowpal-wabbit-8.6.1.dfsg1/demo/dependencyparsing/tags000066400000000000000000000001161332666127000226220ustar00rootroot00000000000000dep 6 vc 9 obj 7 sub 5 vmod 1 prd 10 pmod 3 nmod 2 amod 11 p 4 sbar 12 root 8 vowpal-wabbit-8.6.1.dfsg1/demo/dependencyparsing/wsj_test_subset000066400000000000000000001243031332666127000251200ustar00rootroot000000000000001 No _ RB RB ADV _ 4 VMOD 2 , _ , , . _ 4 P 3 it _ PR PRP PRON _ 4 SUB 4 was _ VB VBD VERB _ 0 ROOT 5 n't _ RB RB ADV _ 4 VMOD 6 Black _ NN NNP NOUN _ 7 NMOD 7 Monday _ NN NNP NOUN _ 4 PRD 8 . _ . . . _ 4 P 1 But _ CC CC CONJ _ 33 VMOD 2 while _ IN IN ADP _ 33 VMOD 3 the _ DT DT DET _ 7 NMOD 4 New _ NN NNP NOUN _ 7 NMOD 5 York _ NN NNP NOUN _ 7 NMOD 6 Stock _ NN NNP NOUN _ 7 NMOD 7 Exchange _ NN NNP NOUN _ 8 SUB 8 did _ VB VBD VERB _ 2 SBAR 9 n't _ RB RB ADV _ 8 VMOD 10 fall _ VB VB VERB _ 8 VC 11 apart _ RB RB ADV _ 10 VMOD 12 Friday _ NN NNP NOUN _ 10 VMOD 13 as _ IN IN ADP _ 10 VMOD 14 the _ DT DT DET _ 18 NMOD 15 Dow _ NN NNP NOUN _ 18 NMOD 16 Jones _ NN NNP NOUN _ 18 NMOD 17 Industrial _ NN NNP NOUN _ 18 NMOD 18 Average _ NN NNP NOUN _ 19 SUB 19 plunged _ VB VBD VERB _ 13 SBAR 20 190.58 _ CD CD NUM _ 21 NMOD 21 points _ NN NNS NOUN _ 19 VMOD 22 -- _ : : . _ 23 P 23 most _ JJ JJS ADJ _ 21 NMOD 24 of _ IN IN ADP _ 23 NMOD 25 it _ PR PRP PRON _ 24 PMOD 26 in _ IN IN ADP _ 23 NMOD 27 the _ DT DT DET _ 29 NMOD 28 final _ JJ JJ ADJ _ 29 NMOD 29 hour _ NN NN NOUN _ 26 PMOD 30 -- _ : : . _ 23 P 31 it _ PR PRP PRON _ 33 SUB 32 barely _ RB RB ADV _ 33 VMOD 33 managed _ VB VBD VERB _ 0 ROOT 34 to _ TO TO PRT _ 35 VMOD 35 stay _ VB VB VERB _ 33 VMOD 36 this _ DT DT DET _ 37 NMOD 37 side _ NN NN NOUN _ 35 VMOD 38 of _ IN IN ADP _ 37 NMOD 39 chaos _ NN NN NOUN _ 38 PMOD 40 . _ . . . _ 33 P 1 Some _ DT DT DET _ 4 NMOD 2 `` _ `` `` . _ 4 P 3 circuit _ NN NN NOUN _ 4 NMOD 4 breakers _ NN NNS NOUN _ 12 SUB 5 '' _ '' '' . _ 4 P 6 installed _ VB VBN VERB _ 4 NMOD 7 after _ IN IN ADP _ 6 VMOD 8 the _ DT DT DET _ 11 NMOD 9 October _ NN NNP NOUN _ 11 NMOD 10 1987 _ CD CD NUM _ 11 NMOD 11 crash _ NN NN NOUN _ 7 PMOD 12 failed _ VB VBD VERB _ 0 ROOT 13 their _ PR PRP$ PRON _ 15 NMOD 14 first _ JJ JJ ADJ _ 15 NMOD 15 test _ NN NN NOUN _ 12 OBJ 16 , _ , , . _ 18 P 17 traders _ NN NNS NOUN _ 18 SUB 18 say _ VB VBP VERB _ 12 VMOD 19 , _ , , . _ 18 P 20 unable _ JJ JJ ADJ _ 12 VMOD 21 to _ TO TO PRT _ 22 VMOD 22 cool _ VB VB VERB _ 20 AMOD 23 the _ DT DT DET _ 25 NMOD 24 selling _ NN NN NOUN _ 25 NMOD 25 panic _ NN NN NOUN _ 22 OBJ 26 in _ IN IN ADP _ 25 NMOD 27 both _ DT DT DET _ 30 NMOD 28 stocks _ NN NNS NOUN _ 30 NMOD 29 and _ CC CC CONJ _ 30 NMOD 30 futures _ NN NNS NOUN _ 26 PMOD 31 . _ . . . _ 12 P 1 The _ DT DT DET _ 5 NMOD 2 49 _ CD CD NUM _ 5 NMOD 3 stock _ NN NN NOUN _ 5 NMOD 4 specialist _ NN NN NOUN _ 5 NMOD 5 firms _ NN NNS NOUN _ 15 NMOD 6 on _ IN IN ADP _ 5 NMOD 7 the _ DT DT DET _ 10 NMOD 8 Big _ NN NNP NOUN _ 10 NMOD 9 Board _ NN NNP NOUN _ 10 NMOD 10 floor _ NN NN NOUN _ 6 PMOD 11 -- _ : : . _ 15 P 12 the _ DT DT DET _ 15 NMOD 13 buyers _ NN NNS NOUN _ 15 NMOD 14 and _ CC CC CONJ _ 15 NMOD 15 sellers _ NN NNS NOUN _ 29 SUB 16 of _ IN IN ADP _ 15 NMOD 17 last _ JJ JJ ADJ _ 18 NMOD 18 resort _ NN NN NOUN _ 16 PMOD 19 who _ WP WP PRON _ 15 NMOD 20 were _ VB VBD VERB _ 19 SBAR 21 criticized _ VB VBN VERB _ 20 VC 22 after _ IN IN ADP _ 21 VMOD 23 the _ DT DT DET _ 25 NMOD 24 1987 _ CD CD NUM _ 25 NMOD 25 crash _ NN NN NOUN _ 22 PMOD 26 -- _ : : . _ 15 P 27 once _ RB RB ADV _ 29 VMOD 28 again _ RB RB ADV _ 27 AMOD 29 could _ MD MD VERB _ 0 ROOT 30 n't _ RB RB ADV _ 29 VMOD 31 handle _ VB VB VERB _ 29 VC 32 the _ DT DT DET _ 34 NMOD 33 selling _ NN NN NOUN _ 34 NMOD 34 pressure _ NN NN NOUN _ 31 OBJ 35 . _ . . . _ 29 P 1 Big _ JJ JJ ADJ _ 3 NMOD 2 investment _ NN NN NOUN _ 3 NMOD 3 banks _ NN NNS NOUN _ 4 SUB 4 refused _ VB VBD VERB _ 25 VMOD 5 to _ TO TO PRT _ 6 VMOD 6 step _ VB VB VERB _ 4 VMOD 7 up _ IN IN ADP _ 6 VMOD 8 to _ TO TO PRT _ 7 AMOD 9 the _ DT DT DET _ 10 NMOD 10 plate _ NN NN NOUN _ 8 PMOD 11 to _ TO TO PRT _ 12 VMOD 12 support _ VB VB VERB _ 6 VMOD 13 the _ DT DT DET _ 16 NMOD 14 beleaguered _ JJ JJ ADJ _ 16 NMOD 15 floor _ NN NN NOUN _ 16 NMOD 16 traders _ NN NNS NOUN _ 12 OBJ 17 by _ IN IN ADP _ 12 VMOD 18 buying _ VB VBG VERB _ 17 PMOD 19 big _ JJ JJ ADJ _ 20 NMOD 20 blocks _ NN NNS NOUN _ 18 OBJ 21 of _ IN IN ADP _ 20 NMOD 22 stock _ NN NN NOUN _ 21 PMOD 23 , _ , , . _ 25 P 24 traders _ NN NNS NOUN _ 25 SUB 25 say _ VB VBP VERB _ 0 ROOT 26 . _ . . . _ 25 P 1 Heavy _ JJ JJ ADJ _ 2 NMOD 2 selling _ NN NN NOUN _ 14 SUB 3 of _ IN IN ADP _ 2 NMOD 4 Standard _ NN NNP NOUN _ 7 NMOD 5 & _ CC CC CONJ _ 7 NMOD 6 Poor _ NN NNP NOUN _ 7 NMOD 7 's _ PO POS PRT _ 10 NMOD 8 500-stock _ JJ JJ ADJ _ 10 NMOD 9 index _ NN NN NOUN _ 10 NMOD 10 futures _ NN NNS NOUN _ 3 PMOD 11 in _ IN IN ADP _ 2 NMOD 12 Chicago _ NN NNP NOUN _ 11 PMOD 13 relentlessly _ RB RB ADV _ 14 VMOD 14 beat _ VB VBD VERB _ 0 ROOT 15 stocks _ NN NNS NOUN _ 14 OBJ 16 downward _ RB RB ADV _ 14 VMOD 17 . _ . . . _ 14 P 1 Seven _ CD CD NUM _ 4 NMOD 2 Big _ NN NNP NOUN _ 4 NMOD 3 Board _ NN NNP NOUN _ 4 NMOD 4 stocks _ NN NNS NOUN _ 23 NMOD 5 -- _ : : . _ 23 P 6 UAL _ NN NNP NOUN _ 23 NMOD 7 , _ , , . _ 23 P 8 AMR _ NN NNP NOUN _ 23 NMOD 9 , _ , , . _ 23 P 10 BankAmerica _ NN NNP NOUN _ 23 NMOD 11 , _ , , . _ 23 P 12 Walt _ NN NNP NOUN _ 13 NMOD 13 Disney _ NN NNP NOUN _ 23 NMOD 14 , _ , , . _ 23 P 15 Capital _ NN NNP NOUN _ 16 NMOD 16 Cities\/ABC _ NN NNP NOUN _ 23 NMOD 17 , _ , , . _ 23 P 18 Philip _ NN NNP NOUN _ 19 NMOD 19 Morris _ NN NNP NOUN _ 23 NMOD 20 and _ CC CC CONJ _ 23 NMOD 21 Pacific _ NN NNP NOUN _ 23 NMOD 22 Telesis _ NN NNP NOUN _ 23 NMOD 23 Group _ NN NNP NOUN _ 25 SUB 24 -- _ : : . _ 23 P 25 stopped _ VB VBD VERB _ 0 ROOT 26 trading _ VB VBG VERB _ 25 VMOD 27 and _ CC CC CONJ _ 25 VMOD 28 never _ RB RB ADV _ 29 VMOD 29 resumed _ VB VBD VERB _ 25 VMOD 30 . _ . . . _ 25 P 1 The _ DT DT DET _ 2 NMOD 2 finger-pointing _ NN NN NOUN _ 3 SUB 3 has _ VB VBZ VERB _ 0 ROOT 4 already _ RB RB ADV _ 3 VMOD 5 begun _ VB VBN VERB _ 3 VC 6 . _ . . . _ 3 P 1 `` _ `` `` . _ 5 P 2 The _ DT DT DET _ 4 NMOD 3 equity _ NN NN NOUN _ 4 NMOD 4 market _ NN NN NOUN _ 5 SUB 5 was _ VB VBD VERB _ 0 ROOT 6 illiquid _ JJ JJ ADJ _ 5 PRD 7 . _ . . . _ 5 P 1 Once _ RB RB ADV _ 7 VMOD 2 again _ RB RB ADV _ 1 AMOD 3 { _ -L -LRB- . _ 7 P 4 the _ DT DT DET _ 5 NMOD 5 specialists _ NN NNS NOUN _ 7 SUB 6 } _ -R -RRB- . _ 7 P 7 were _ VB VBD VERB _ 25 VMOD 8 not _ RB RB ADV _ 7 VMOD 9 able _ JJ JJ ADJ _ 7 PRD 10 to _ TO TO PRT _ 11 VMOD 11 handle _ VB VB VERB _ 9 AMOD 12 the _ DT DT DET _ 13 NMOD 13 imbalances _ NN NNS NOUN _ 11 OBJ 14 on _ IN IN ADP _ 13 NMOD 15 the _ DT DT DET _ 16 NMOD 16 floor _ NN NN NOUN _ 14 PMOD 17 of _ IN IN ADP _ 16 NMOD 18 the _ DT DT DET _ 22 NMOD 19 New _ NN NNP NOUN _ 22 NMOD 20 York _ NN NNP NOUN _ 22 NMOD 21 Stock _ NN NNP NOUN _ 22 NMOD 22 Exchange _ NN NNP NOUN _ 17 PMOD 23 , _ , , . _ 25 P 24 '' _ '' '' . _ 25 P 25 said _ VB VBD VERB _ 0 ROOT 26 Christopher _ NN NNP NOUN _ 27 NMOD 27 Pedersen _ NN NNP NOUN _ 31 NMOD 28 , _ , , . _ 31 P 29 senior _ JJ JJ ADJ _ 31 NMOD 30 vice _ NN NN NOUN _ 31 NMOD 31 president _ NN NN NOUN _ 25 SUB 32 at _ IN IN ADP _ 31 NMOD 33 Twenty-First _ NN NNP NOUN _ 35 NMOD 34 Securities _ NN NNP NOUN _ 35 NMOD 35 Corp _ NN NNP NOUN _ 32 PMOD 36 . _ . . . _ 25 P 1 Countered _ VB VBD VERB _ 0 ROOT 2 James _ NN NNP NOUN _ 3 NMOD 3 Maguire _ NN NNP NOUN _ 5 NMOD 4 , _ , , . _ 5 P 5 chairman _ NN NN NOUN _ 1 SUB 6 of _ IN IN ADP _ 5 NMOD 7 specialists _ NN NNS NOUN _ 10 NMOD 8 Henderson _ NN NNP NOUN _ 10 NMOD 9 Brothers _ NN NNP NOUN _ 10 NMOD 10 Inc. _ NN NNP NOUN _ 6 PMOD 11 : _ : : . _ 1 P 12 `` _ `` `` . _ 1 P 13 It _ PR PRP PRON _ 14 SUB 14 is _ VB VBZ VERB _ 1 VMOD 15 easy _ JJ JJ ADJ _ 14 PRD 16 to _ TO TO PRT _ 17 VMOD 17 say _ VB VB VERB _ 14 VMOD 18 the _ DT DT DET _ 19 NMOD 19 specialist _ NN NN NOUN _ 20 SUB 20 is _ VB VBZ VERB _ 17 VMOD 21 n't _ RB RB ADV _ 20 VMOD 22 doing _ VB VBG VERB _ 20 VC 23 his _ PR PRP$ PRON _ 24 NMOD 24 job _ NN NN NOUN _ 22 OBJ 25 . _ . . . _ 1 P 1 When _ WR WRB ADV _ 12 VMOD 2 the _ DT DT DET _ 3 NMOD 3 dollar _ NN NN NOUN _ 4 SUB 4 is _ VB VBZ VERB _ 1 SBAR 5 in _ IN IN ADP _ 4 VMOD 6 a _ DT DT DET _ 7 NMOD 7 free-fall _ NN NN NOUN _ 5 PMOD 8 , _ , , . _ 12 P 9 even _ RB RB ADV _ 11 NMOD 10 central _ JJ JJ ADJ _ 11 NMOD 11 banks _ NN NNS NOUN _ 12 SUB 12 ca _ MD MD VERB _ 0 ROOT 13 n't _ RB RB ADV _ 12 VMOD 14 stop _ VB VB VERB _ 12 VC 15 it _ PR PRP PRON _ 14 OBJ 16 . _ . . . _ 12 P 1 Speculators _ NN NNS NOUN _ 2 SUB 2 are _ VB VBP VERB _ 0 ROOT 3 calling _ VB VBG VERB _ 2 VC 4 for _ IN IN ADP _ 3 VMOD 5 a _ DT DT DET _ 6 NMOD 6 degree _ NN NN NOUN _ 4 PMOD 7 of _ IN IN ADP _ 6 NMOD 8 liquidity _ NN NN NOUN _ 7 PMOD 9 that _ WD WDT DET _ 6 NMOD 10 is _ VB VBZ VERB _ 9 SBAR 11 not _ RB RB ADV _ 10 VMOD 12 there _ RB RB ADV _ 10 VMOD 13 in _ IN IN ADP _ 10 VMOD 14 the _ DT DT DET _ 15 NMOD 15 market _ NN NN NOUN _ 13 PMOD 16 . _ . . . _ 2 P 17 '' _ '' '' . _ 2 P 1 Many _ JJ JJ ADJ _ 3 NMOD 2 money _ NN NN NOUN _ 3 NMOD 3 managers _ NN NNS NOUN _ 6 NMOD 4 and _ CC CC CONJ _ 6 NMOD 5 some _ DT DT DET _ 6 NMOD 6 traders _ NN NNS NOUN _ 7 SUB 7 had _ VB VBD VERB _ 0 ROOT 8 already _ RB RB ADV _ 7 VMOD 9 left _ VB VBN VERB _ 7 VC 10 their _ PR PRP$ PRON _ 11 NMOD 11 offices _ NN NNS NOUN _ 9 OBJ 12 early _ RB RB ADV _ 14 NMOD 13 Friday _ NN NNP NOUN _ 14 NMOD 14 afternoon _ NN NN NOUN _ 9 VMOD 15 on _ IN IN ADP _ 9 VMOD 16 a _ DT DT DET _ 19 NMOD 17 warm _ JJ JJ ADJ _ 19 NMOD 18 autumn _ NN NN NOUN _ 19 NMOD 19 day _ NN NN NOUN _ 15 PMOD 20 -- _ : : . _ 9 P 21 because _ IN IN ADP _ 9 VMOD 22 the _ DT DT DET _ 24 NMOD 23 stock _ NN NN NOUN _ 24 NMOD 24 market _ NN NN NOUN _ 25 SUB 25 was _ VB VBD VERB _ 21 SBAR 26 so _ RB RB ADV _ 27 AMOD 27 quiet _ JJ JJ ADJ _ 25 PRD 28 . _ . . . _ 7 P 1 Then _ RB RB ADV _ 15 VMOD 2 in _ IN IN ADP _ 15 VMOD 3 a _ DT DT DET _ 5 NMOD 4 lightning _ NN NN NOUN _ 5 NMOD 5 plunge _ NN NN NOUN _ 2 PMOD 6 , _ , , . _ 15 P 7 the _ DT DT DET _ 10 NMOD 8 Dow _ NN NNP NOUN _ 10 NMOD 9 Jones _ NN NNP NOUN _ 10 NMOD 10 industrials _ NN NNS NOUN _ 15 SUB 11 in _ IN IN ADP _ 15 VMOD 12 barely _ RB RB ADV _ 14 NMOD 13 an _ DT DT DET _ 12 AMOD 14 hour _ NN NN NOUN _ 11 PMOD 15 surrendered _ VB VBD VERB _ 0 ROOT 16 about _ RB RB ADV _ 18 NMOD 17 a _ DT DT DET _ 16 AMOD 18 third _ JJ JJ ADJ _ 15 OBJ 19 of _ IN IN ADP _ 18 NMOD 20 their _ PR PRP$ PRON _ 21 NMOD 21 gains _ NN NNS NOUN _ 23 NMOD 22 this _ DT DT DET _ 23 NMOD 23 year _ NN NN NOUN _ 19 PMOD 24 , _ , , . _ 15 P 25 chalking _ VB VBG VERB _ 15 VMOD 26 up _ RP RP PRT _ 25 VMOD 27 a _ DT DT DET _ 34 NMOD 28 190.58-point _ JJ JJ ADJ _ 32 AMOD 29 , _ , , . _ 32 P 30 or _ CC CC CONJ _ 32 AMOD 31 6.9 _ CD CD NUM _ 32 AMOD 32 % _ NN NN NOUN _ 34 NMOD 33 , _ , , . _ 32 P 34 loss _ NN NN NOUN _ 25 OBJ 35 on _ IN IN ADP _ 34 NMOD 36 the _ DT DT DET _ 37 NMOD 37 day _ NN NN NOUN _ 35 PMOD 38 in _ IN IN ADP _ 25 VMOD 39 gargantuan _ JJ JJ ADJ _ 41 NMOD 40 trading _ NN NN NOUN _ 41 NMOD 41 volume _ NN NN NOUN _ 38 PMOD 42 . _ . . . _ 15 P 1 Final-hour _ JJ JJ ADJ _ 2 NMOD 2 trading _ NN NN NOUN _ 3 SUB 3 accelerated _ VB VBD VERB _ 0 ROOT 4 to _ TO TO PRT _ 3 VMOD 5 108.1 _ CD CD NUM _ 6 AMOD 6 million _ CD CD NUM _ 7 NMOD 7 shares _ NN NNS NOUN _ 10 NMOD 8 , _ , , . _ 10 P 9 a _ DT DT DET _ 10 NMOD 10 record _ NN NN NOUN _ 4 PMOD 11 for _ IN IN ADP _ 10 NMOD 12 the _ DT DT DET _ 14 NMOD 13 Big _ NN NNP NOUN _ 14 NMOD 14 Board _ NN NNP NOUN _ 11 PMOD 15 . _ . . . _ 3 P 1 At _ IN IN ADP _ 11 VMOD 2 the _ DT DT DET _ 3 NMOD 3 end _ NN NN NOUN _ 1 PMOD 4 of _ IN IN ADP _ 3 NMOD 5 the _ DT DT DET _ 6 NMOD 6 day _ NN NN NOUN _ 4 PMOD 7 , _ , , . _ 11 P 8 251.2 _ CD CD NUM _ 9 AMOD 9 million _ CD CD NUM _ 10 NMOD 10 shares _ NN NNS NOUN _ 11 SUB 11 were _ VB VBD VERB _ 0 ROOT 12 traded _ VB VBN VERB _ 11 VC 13 . _ . . . _ 11 P 1 The _ DT DT DET _ 4 NMOD 2 Dow _ NN NNP NOUN _ 4 NMOD 3 Jones _ NN NNP NOUN _ 4 NMOD 4 industrials _ NN NNS NOUN _ 5 SUB 5 closed _ VB VBD VERB _ 0 ROOT 6 at _ IN IN ADP _ 5 VMOD 7 2569.26 _ CD CD NUM _ 6 PMOD 8 . _ . . . _ 5 P 1 The _ DT DT DET _ 3 NMOD 2 Dow _ NN NNP NOUN _ 3 NMOD 3 's _ PO POS PRT _ 4 NMOD 4 decline _ NN NN NOUN _ 5 SUB 5 was _ VB VBD VERB _ 0 ROOT 6 second _ JJ JJ ADJ _ 5 PRD 7 in _ IN IN ADP _ 6 AMOD 8 point _ NN NN NOUN _ 9 NMOD 9 terms _ NN NNS NOUN _ 7 PMOD 10 only _ RB RB ADV _ 11 PMOD 11 to _ TO TO PRT _ 6 AMOD 12 the _ DT DT DET _ 16 NMOD 13 508-point _ JJ JJ ADJ _ 16 NMOD 14 Black _ NN NNP NOUN _ 16 NMOD 15 Monday _ NN NNP NOUN _ 16 NMOD 16 crash _ NN NN NOUN _ 11 PMOD 17 that _ WD WDT DET _ 16 NMOD 18 occurred _ VB VBD VERB _ 17 SBAR 19 Oct. _ NN NNP NOUN _ 18 VMOD 20 19 _ CD CD NUM _ 19 NMOD 21 , _ , , . _ 19 P 22 1987 _ CD CD NUM _ 19 NMOD 23 . _ . . . _ 5 P 1 In _ IN IN ADP _ 11 VMOD 2 percentage _ NN NN NOUN _ 3 NMOD 3 terms _ NN NNS NOUN _ 1 PMOD 4 , _ , , . _ 11 P 5 however _ RB RB ADV _ 11 VMOD 6 , _ , , . _ 11 P 7 the _ DT DT DET _ 9 NMOD 8 Dow _ NN NNP NOUN _ 9 NMOD 9 's _ PO POS PRT _ 10 NMOD 10 dive _ NN NN NOUN _ 11 SUB 11 was _ VB VBD VERB _ 0 ROOT 12 the _ DT DT DET _ 13 NMOD 13 12th-worst _ JJ JJ ADJ _ 17 NMOD 14 ever _ RB RB ADV _ 13 NMOD 15 and _ CC CC CONJ _ 17 NMOD 16 the _ DT DT DET _ 17 NMOD 17 sharpest _ JJ JJS ADJ _ 11 PRD 18 since _ IN IN ADP _ 17 NMOD 19 the _ DT DT DET _ 20 NMOD 20 market _ NN NN NOUN _ 21 SUB 21 fell _ VB VBD VERB _ 18 SBAR 22 156.83 _ CD CD NUM _ 26 NMOD 23 , _ , , . _ 26 P 24 or _ CC CC CONJ _ 26 NMOD 25 8 _ CD CD NUM _ 26 NMOD 26 % _ NN NN NOUN _ 21 VMOD 27 , _ , , . _ 21 P 28 a _ DT DT DET _ 29 NMOD 29 week _ NN NN NOUN _ 30 PMOD 30 after _ IN IN ADP _ 21 VMOD 31 Black _ NN NNP NOUN _ 32 NMOD 32 Monday _ NN NNP NOUN _ 30 PMOD 33 . _ . . . _ 11 P 1 The _ DT DT DET _ 2 NMOD 2 Dow _ NN NNP NOUN _ 3 SUB 3 fell _ VB VBD VERB _ 0 ROOT 4 22.6 _ CD CD NUM _ 5 NMOD 5 % _ NN NN NOUN _ 3 VMOD 6 on _ IN IN ADP _ 3 VMOD 7 Black _ NN NNP NOUN _ 8 NMOD 8 Monday _ NN NNP NOUN _ 6 PMOD 9 . _ . . . _ 3 P 1 Shares _ NN NNP NOUN _ 11 SUB 2 of _ IN IN ADP _ 1 NMOD 3 UAL _ NN NNP NOUN _ 6 NMOD 4 , _ , , . _ 6 P 5 the _ DT DT DET _ 6 NMOD 6 parent _ NN NN NOUN _ 2 PMOD 7 of _ IN IN ADP _ 6 NMOD 8 United _ NN NNP NOUN _ 9 NMOD 9 Airlines _ NN NNP NOUN _ 7 PMOD 10 , _ , , . _ 6 P 11 were _ VB VBD VERB _ 0 ROOT 12 extremely _ RB RB ADV _ 13 AMOD 13 active _ JJ JJ ADJ _ 11 PRD 14 all _ DT DT DET _ 15 NMOD 15 day _ NN NN NOUN _ 11 VMOD 16 Friday _ NN NNP NOUN _ 11 VMOD 17 , _ , , . _ 11 P 18 reacting _ VB VBG VERB _ 11 VMOD 19 to _ TO TO PRT _ 18 VMOD 20 news _ NN NN NOUN _ 22 NMOD 21 and _ CC CC CONJ _ 22 NMOD 22 rumors _ NN NNS NOUN _ 19 PMOD 23 about _ IN IN ADP _ 22 NMOD 24 the _ DT DT DET _ 29 NMOD 25 proposed _ VB VBN VERB _ 29 NMOD 26 $ _ $ $ . _ 29 P 27 6.79 _ CD CD NUM _ 26 AMOD 28 billion _ CD CD NUM _ 26 AMOD 29 buy-out _ NN NN NOUN _ 23 PMOD 30 of _ IN IN ADP _ 29 NMOD 31 the _ DT DT DET _ 32 NMOD 32 airline _ NN NN NOUN _ 30 PMOD 33 by _ IN IN ADP _ 29 NMOD 34 an _ DT DT DET _ 36 NMOD 35 employee-management _ JJ JJ ADJ _ 36 NMOD 36 group _ NN NN NOUN _ 33 PMOD 37 . _ . . . _ 11 P 1 Wall _ NN NNP NOUN _ 3 NMOD 2 Street _ NN NNP NOUN _ 3 NMOD 3 's _ PO POS PRT _ 10 NMOD 4 takeover-stock _ JJ JJ ADJ _ 5 NMOD 5 speculators _ NN NNS NOUN _ 10 NMOD 6 , _ , , . _ 10 P 7 or _ CC CC CONJ _ 10 NMOD 8 `` _ `` `` . _ 10 P 9 risk _ NN NN NOUN _ 10 NMOD 10 arbitragers _ NN NNS NOUN _ 13 SUB 11 , _ , , . _ 10 P 12 '' _ '' '' . _ 10 P 13 had _ VB VBD VERB _ 0 ROOT 14 placed _ VB VBN VERB _ 13 VC 15 unusually _ RB RB ADV _ 16 AMOD 16 large _ JJ JJ ADJ _ 17 NMOD 17 bets _ NN NNS NOUN _ 14 OBJ 18 that _ IN IN ADP _ 17 NMOD 19 a _ DT DT DET _ 20 NMOD 20 takeover _ NN NN NOUN _ 21 SUB 21 would _ MD MD VERB _ 26 VMOD 22 succeed _ VB VB VERB _ 21 VC 23 and _ CC CC CONJ _ 26 VMOD 24 UAL _ NN NNP NOUN _ 25 NMOD 25 stock _ NN NN NOUN _ 26 SUB 26 would _ MD MD VERB _ 18 SBAR 27 rise _ VB VB VERB _ 26 VC 28 . _ . . . _ 13 P 1 At _ IN IN ADP _ 6 VMOD 2 2:43 _ CD CD NUM _ 4 NMOD 3 p.m. _ RB RB ADV _ 4 NMOD 4 EDT _ NN NNP NOUN _ 1 PMOD 5 , _ , , . _ 6 P 6 came _ VB VBD VERB _ 14 VMOD 7 the _ DT DT DET _ 9 NMOD 8 sickening _ JJ JJ ADJ _ 9 NMOD 9 news _ NN NN NOUN _ 6 SUB 10 : _ : : . _ 14 P 11 The _ DT DT DET _ 13 NMOD 12 Big _ NN NNP NOUN _ 13 NMOD 13 Board _ NN NNP NOUN _ 14 SUB 14 was _ VB VBD VERB _ 0 ROOT 15 halting _ VB VBG VERB _ 14 VC 16 trading _ VB VBG VERB _ 15 OBJ 17 in _ IN IN ADP _ 16 NMOD 18 UAL _ NN NNP NOUN _ 17 PMOD 19 , _ , , . _ 15 P 20 `` _ `` `` . _ 15 P 21 pending _ VB VBG VERB _ 15 VMOD 22 news _ NN NN NOUN _ 21 PMOD 23 . _ . . . _ 14 P 24 '' _ '' '' . _ 14 P 1 On _ IN IN ADP _ 15 VMOD 2 the _ DT DT DET _ 4 NMOD 3 exchange _ NN NN NOUN _ 4 NMOD 4 floor _ NN NN NOUN _ 1 PMOD 5 , _ , , . _ 15 P 6 `` _ `` `` . _ 15 P 7 as _ RB RB ADV _ 15 VMOD 8 soon _ RB RB ADV _ 7 AMOD 9 as _ IN IN ADP _ 7 AMOD 10 UAL _ NN NNP NOUN _ 11 SUB 11 stopped _ VB VBD VERB _ 9 SBAR 12 trading _ VB VBG VERB _ 11 VMOD 13 , _ , , . _ 15 P 14 we _ PR PRP PRON _ 15 SUB 15 braced _ VB VBD VERB _ 21 VMOD 16 for _ IN IN ADP _ 15 VMOD 17 a _ DT DT DET _ 18 NMOD 18 panic _ NN NN NOUN _ 16 PMOD 19 , _ , , . _ 21 P 20 '' _ '' '' . _ 21 P 21 said _ VB VBD VERB _ 0 ROOT 22 one _ CD CD NUM _ 25 NMOD 23 top _ JJ JJ ADJ _ 25 NMOD 24 floor _ NN NN NOUN _ 25 NMOD 25 trader _ NN NN NOUN _ 21 SUB 26 . _ . . . _ 21 P 1 Several _ JJ JJ ADJ _ 2 NMOD 2 traders _ NN NNS NOUN _ 3 SUB 3 could _ MD MD VERB _ 0 ROOT 4 be _ VB VB VERB _ 3 VC 5 seen _ VB VBN VERB _ 4 VC 6 shaking _ VB VBG VERB _ 5 VMOD 7 their _ PR PRP$ PRON _ 8 NMOD 8 heads _ NN NNS NOUN _ 6 OBJ 9 when _ WR WRB ADV _ 5 VMOD 10 the _ DT DT DET _ 11 NMOD 11 news _ NN NN NOUN _ 12 SUB 12 flashed _ VB VBD VERB _ 9 SBAR 13 . _ . . . _ 3 P 1 For _ IN IN ADP _ 6 VMOD 2 weeks _ NN NNS NOUN _ 1 PMOD 3 , _ , , . _ 6 P 4 the _ DT DT DET _ 5 NMOD 5 market _ NN NN NOUN _ 6 SUB 6 had _ VB VBD VERB _ 0 ROOT 7 been _ VB VBN VERB _ 6 VC 8 nervous _ JJ JJ ADJ _ 7 PRD 9 about _ IN IN ADP _ 8 AMOD 10 takeovers _ NN NNS NOUN _ 9 PMOD 11 , _ , , . _ 7 P 12 after _ IN IN ADP _ 7 VMOD 13 Campeau _ NN NNP NOUN _ 15 NMOD 14 Corp. _ NN NNP NOUN _ 15 NMOD 15 's _ PO POS PRT _ 17 NMOD 16 cash _ NN NN NOUN _ 17 NMOD 17 crunch _ NN NN NOUN _ 18 SUB 18 spurred _ VB VBD VERB _ 12 SBAR 19 concern _ NN NN NOUN _ 18 OBJ 20 about _ IN IN ADP _ 19 NMOD 21 the _ DT DT DET _ 22 NMOD 22 prospects _ NN NNS NOUN _ 20 PMOD 23 for _ IN IN ADP _ 22 NMOD 24 future _ JJ JJ ADJ _ 27 NMOD 25 highly _ RB RB ADV _ 26 AMOD 26 leveraged _ JJ JJ ADJ _ 27 NMOD 27 takeovers _ NN NNS NOUN _ 23 PMOD 28 . _ . . . _ 6 P 1 And _ CC CC CONJ _ 9 VMOD 2 10 _ CD CD NUM _ 3 NMOD 3 minutes _ NN NNS NOUN _ 4 PMOD 4 after _ IN IN ADP _ 9 VMOD 5 the _ DT DT DET _ 8 NMOD 6 UAL _ NN NNP NOUN _ 8 NMOD 7 trading _ NN NN NOUN _ 8 NMOD 8 halt _ NN NN NOUN _ 4 PMOD 9 came _ VB VBD VERB _ 0 ROOT 10 news _ NN NN NOUN _ 9 SUB 11 that _ IN IN ADP _ 10 NMOD 12 the _ DT DT DET _ 14 NMOD 13 UAL _ NN NNP NOUN _ 14 NMOD 14 group _ NN NN NOUN _ 15 SUB 15 could _ MD MD VERB _ 11 SBAR 16 n't _ RB RB ADV _ 15 VMOD 17 get _ VB VB VERB _ 15 VC 18 financing _ NN NN NOUN _ 17 OBJ 19 for _ IN IN ADP _ 18 NMOD 20 its _ PR PRP$ PRON _ 21 NMOD 21 bid _ NN NN NOUN _ 19 PMOD 22 . _ . . . _ 9 P 1 At _ IN IN ADP _ 7 VMOD 2 this _ DT DT DET _ 3 NMOD 3 point _ NN NN NOUN _ 1 PMOD 4 , _ , , . _ 7 P 5 the _ DT DT DET _ 6 NMOD 6 Dow _ NN NNP NOUN _ 7 SUB 7 was _ VB VBD VERB _ 0 ROOT 8 down _ RB RB ADV _ 7 PRD 9 about _ RB RB ADV _ 11 NMOD 10 35 _ CD CD NUM _ 9 AMOD 11 points _ NN NNS NOUN _ 8 AMOD 12 . _ . . . _ 7 P 1 The _ DT DT DET _ 2 NMOD 2 market _ NN NN NOUN _ 3 SUB 3 crumbled _ VB VBD VERB _ 0 ROOT 4 . _ . . . _ 3 P 1 Arbitragers _ NN NNS NOUN _ 2 SUB 2 could _ MD MD VERB _ 11 VMOD 3 n't _ RB RB ADV _ 2 VMOD 4 dump _ VB VB VERB _ 2 VC 5 their _ PR PRP$ PRON _ 7 NMOD 6 UAL _ NN NNP NOUN _ 7 NMOD 7 stock _ NN NN NOUN _ 4 OBJ 8 -- _ : : . _ 11 P 9 but _ CC CC CONJ _ 11 VMOD 10 they _ PR PRP PRON _ 11 SUB 11 rid _ VB VBD VERB _ 0 ROOT 12 themselves _ PR PRP PRON _ 11 OBJ 13 of _ IN IN ADP _ 11 VMOD 14 nearly _ RB RB ADV _ 15 AMOD 15 every _ DT DT DET _ 19 NMOD 16 `` _ `` `` . _ 19 P 17 rumor _ NN NN NOUN _ 19 NMOD 18 '' _ '' '' . _ 19 P 19 stock _ NN NN NOUN _ 13 PMOD 20 they _ PR PRP PRON _ 21 SUB 21 had _ VB VBD VERB _ 19 NMOD 22 . _ . . . _ 11 P 1 For _ IN IN ADP _ 6 VMOD 2 example _ NN NN NOUN _ 1 PMOD 3 , _ , , . _ 6 P 4 their _ PR PRP$ PRON _ 5 NMOD 5 selling _ NN NN NOUN _ 6 SUB 6 caused _ VB VBD VERB _ 0 ROOT 7 trading _ NN NN NOUN _ 8 NMOD 8 halts _ NN NNS NOUN _ 10 SUB 9 to _ TO TO PRT _ 10 VMOD 10 be _ VB VB VERB _ 6 VMOD 11 declared _ VB VBN VERB _ 10 VC 12 in _ IN IN ADP _ 11 VMOD 13 USAir _ NN NNP NOUN _ 14 NMOD 14 Group _ NN NNP NOUN _ 39 NMOD 15 , _ , , . _ 14 P 16 which _ WD WDT DET _ 14 NMOD 17 closed _ VB VBD VERB _ 16 SBAR 18 down _ RB RB ADV _ 17 VMOD 19 3 _ CD CD NUM _ 20 AMOD 20 7\/8 _ CD CD NUM _ 18 AMOD 21 to _ TO TO PRT _ 18 AMOD 22 41 _ CD CD NUM _ 23 AMOD 23 1\/2 _ CD CD NUM _ 21 PMOD 24 , _ , , . _ 39 P 25 Delta _ NN NNP NOUN _ 27 NMOD 26 Air _ NN NNP NOUN _ 27 NMOD 27 Lines _ NN NNP NOUN _ 39 NMOD 28 , _ , , . _ 27 P 29 which _ WD WDT DET _ 27 NMOD 30 fell _ VB VBD VERB _ 29 SBAR 31 7 _ CD CD NUM _ 32 AMOD 32 3\/4 _ CD CD NUM _ 30 VMOD 33 to _ TO TO PRT _ 30 VMOD 34 69 _ CD CD NUM _ 35 AMOD 35 1\/4 _ CD CD NUM _ 33 PMOD 36 , _ , , . _ 39 P 37 and _ CC CC CONJ _ 39 NMOD 38 Philips _ NN NNP NOUN _ 39 NMOD 39 Industries _ NN NNP NOUN _ 12 PMOD 40 , _ , , . _ 39 P 41 which _ WD WDT DET _ 39 NMOD 42 sank _ VB VBD VERB _ 41 SBAR 43 3 _ CD CD NUM _ 42 VMOD 44 to _ TO TO PRT _ 42 VMOD 45 21 _ CD CD NUM _ 46 AMOD 46 1\/2 _ CD CD NUM _ 44 PMOD 47 . _ . . . _ 6 P 1 These _ DT DT DET _ 2 NMOD 2 stocks _ NN NNS NOUN _ 4 SUB 3 eventually _ RB RB ADV _ 4 VMOD 4 reopened _ VB VBD VERB _ 0 ROOT 5 . _ . . . _ 4 P 1 But _ CC CC CONJ _ 7 VMOD 2 as _ IN IN ADP _ 7 VMOD 3 panic _ NN NN NOUN _ 4 SUB 4 spread _ VB VBD VERB _ 2 SBAR 5 , _ , , . _ 7 P 6 speculators _ NN NNS NOUN _ 7 SUB 7 began _ VB VBD VERB _ 0 ROOT 8 to _ TO TO PRT _ 9 VMOD 9 sell _ VB VB VERB _ 7 VMOD 10 blue-chip _ JJ JJ ADJ _ 11 NMOD 11 stocks _ NN NNS NOUN _ 9 OBJ 12 such _ JJ JJ ADJ _ 13 PMOD 13 as _ IN IN ADP _ 11 NMOD 14 Philip _ NN NNP NOUN _ 15 NMOD 15 Morris _ NN NNP NOUN _ 19 NMOD 16 and _ CC CC CONJ _ 19 NMOD 17 International _ NN NNP NOUN _ 19 NMOD 18 Business _ NN NNP NOUN _ 19 NMOD 19 Machines _ NN NNP NOUN _ 13 PMOD 20 to _ TO TO PRT _ 21 VMOD 21 offset _ VB VB VERB _ 9 VMOD 22 their _ PR PRP$ PRON _ 23 NMOD 23 losses _ NN NNS NOUN _ 21 OBJ 24 . _ . . . _ 7 P 1 When _ WR WRB ADV _ 11 VMOD 2 trading _ NN NN NOUN _ 3 SUB 3 was _ VB VBD VERB _ 1 SBAR 4 halted _ VB VBN VERB _ 3 VC 5 in _ IN IN ADP _ 4 VMOD 6 Philip _ NN NNP NOUN _ 7 NMOD 7 Morris _ NN NNP NOUN _ 5 PMOD 8 , _ , , . _ 11 P 9 the _ DT DT DET _ 10 NMOD 10 stock _ NN NN NOUN _ 11 SUB 11 was _ VB VBD VERB _ 0 ROOT 12 trading _ VB VBG VERB _ 11 VC 13 at _ IN IN ADP _ 12 VMOD 14 41 _ CD CD NUM _ 13 PMOD 15 , _ , , . _ 12 P 16 down _ RB RB ADV _ 12 VMOD 17 3 _ CD CD NUM _ 18 AMOD 18 3\/8 _ CD CD NUM _ 16 AMOD 19 , _ , , . _ 12 P 20 while _ IN IN ADP _ 12 VMOD 21 IBM _ NN NNP NOUN _ 22 SUB 22 closed _ VB VBD VERB _ 20 SBAR 23 5 _ CD CD NUM _ 24 AMOD 24 5\/8 _ CD CD NUM _ 25 AMOD 25 lower _ JJ JJR ADJ _ 22 VMOD 26 at _ IN IN ADP _ 22 VMOD 27 102 _ CD CD NUM _ 26 PMOD 28 . _ . . . _ 11 P 1 Selling _ NN NN NOUN _ 2 SUB 2 snowballed _ VB VBD VERB _ 0 ROOT 3 because _ IN IN ADP _ 2 VMOD 4 of _ IN IN ADP _ 3 PMOD 5 waves _ NN NNS NOUN _ 3 PMOD 6 of _ IN IN ADP _ 5 NMOD 7 automatic _ JJ JJ ADJ _ 11 NMOD 8 `` _ `` `` . _ 11 P 9 stop-loss _ JJ JJ ADJ _ 11 NMOD 10 '' _ '' '' . _ 11 P 11 orders _ NN NNS NOUN _ 6 PMOD 12 , _ , , . _ 11 P 13 which _ WD WDT DET _ 11 NMOD 14 are _ VB VBP VERB _ 13 SBAR 15 triggered _ VB VBN VERB _ 14 VC 16 by _ IN IN ADP _ 15 VMOD 17 computer _ NN NN NOUN _ 16 PMOD 18 when _ WR WRB ADV _ 15 VMOD 19 prices _ NN NNS NOUN _ 20 SUB 20 fall _ VB VBP VERB _ 18 SBAR 21 to _ TO TO PRT _ 20 VMOD 22 certain _ JJ JJ ADJ _ 23 NMOD 23 levels _ NN NNS NOUN _ 21 PMOD 24 . _ . . . _ 2 P 1 Most _ JJ JJS ADJ _ 7 SUB 2 of _ IN IN ADP _ 1 NMOD 3 the _ DT DT DET _ 6 NMOD 4 stock _ NN NN NOUN _ 6 NMOD 5 selling _ NN NN NOUN _ 6 NMOD 6 pressure _ NN NN NOUN _ 2 PMOD 7 came _ VB VBD VERB _ 0 ROOT 8 from _ IN IN ADP _ 7 VMOD 9 Wall _ NN NNP NOUN _ 11 NMOD 10 Street _ NN NNP NOUN _ 11 NMOD 11 professionals _ NN NNS NOUN _ 8 PMOD 12 , _ , , . _ 11 P 13 including _ VB VBG VERB _ 11 NMOD 14 computer-guided _ JJ JJ ADJ _ 16 NMOD 15 program _ NN NN NOUN _ 16 NMOD 16 traders _ NN NNS NOUN _ 13 PMOD 17 . _ . . . _ 7 P 1 Traders _ NN NNS NOUN _ 2 SUB 2 said _ VB VBD VERB _ 0 ROOT 3 most _ JJ JJS ADJ _ 15 SUB 4 of _ IN IN ADP _ 3 NMOD 5 their _ PR PRP$ PRON _ 8 NMOD 6 major _ JJ JJ ADJ _ 8 NMOD 7 institutional _ JJ JJ ADJ _ 8 NMOD 8 investors _ NN NNS NOUN _ 4 PMOD 9 , _ , , . _ 15 P 10 on _ IN IN ADP _ 15 VMOD 11 the _ DT DT DET _ 13 NMOD 12 other _ JJ JJ ADJ _ 13 NMOD 13 hand _ NN NN NOUN _ 10 PMOD 14 , _ , , . _ 15 P 15 sat _ VB VBD VERB _ 2 VMOD 16 tight _ RB RB ADV _ 15 VMOD 17 . _ . . . _ 2 P 1 Now _ RB RB ADV _ 15 VMOD 2 , _ , , . _ 15 P 3 at _ IN IN ADP _ 15 VMOD 4 3:07 _ CD CD NUM _ 3 PMOD 5 , _ , , . _ 15 P 6 one _ CD CD NUM _ 15 SUB 7 of _ IN IN ADP _ 6 NMOD 8 the _ DT DT DET _ 10 NMOD 9 market _ NN NN NOUN _ 10 NMOD 10 's _ PO POS PRT _ 13 NMOD 11 post-crash _ JJ JJ ADJ _ 13 NMOD 12 `` _ `` `` . _ 13 P 13 reforms _ NN NNS NOUN _ 7 PMOD 14 '' _ '' '' . _ 13 P 15 took _ VB VBD VERB _ 0 ROOT 16 hold _ NN NN NOUN _ 15 OBJ 17 as _ IN IN ADP _ 15 VMOD 18 the _ DT DT DET _ 22 NMOD 19 S&P _ NN NNP NOUN _ 22 NMOD 20 500 _ CD CD NUM _ 22 NMOD 21 futures _ NN NNS NOUN _ 22 NMOD 22 contract _ NN NN NOUN _ 23 SUB 23 had _ VB VBD VERB _ 17 SBAR 24 plunged _ VB VBN VERB _ 23 VC 25 12 _ CD CD NUM _ 26 NMOD 26 points _ NN NNS NOUN _ 24 VMOD 27 , _ , , . _ 26 P 28 equivalent _ JJ JJ ADJ _ 26 NMOD 29 to _ TO TO PRT _ 28 AMOD 30 around _ IN IN ADP _ 33 NMOD 31 a _ DT DT DET _ 30 AMOD 32 100-point _ JJ JJ ADJ _ 30 AMOD 33 drop _ NN NN NOUN _ 29 PMOD 34 in _ IN IN ADP _ 33 NMOD 35 the _ DT DT DET _ 37 NMOD 36 Dow _ NN NNP NOUN _ 37 NMOD 37 industrials _ NN NNS NOUN _ 34 PMOD 38 . _ . . . _ 15 P 1 Under _ IN IN ADP _ 16 VMOD 2 an _ DT DT DET _ 3 NMOD 3 agreement _ NN NN NOUN _ 1 PMOD 4 signed _ VB VBN VERB _ 3 NMOD 5 by _ IN IN ADP _ 4 VMOD 6 the _ DT DT DET _ 8 NMOD 7 Big _ NN NNP NOUN _ 8 NMOD 8 Board _ NN NNP NOUN _ 13 NMOD 9 and _ CC CC CONJ _ 13 NMOD 10 the _ DT DT DET _ 13 NMOD 11 Chicago _ NN NNP NOUN _ 13 NMOD 12 Mercantile _ NN NNP NOUN _ 13 NMOD 13 Exchange _ NN NNP NOUN _ 5 PMOD 14 , _ , , . _ 16 P 15 trading _ NN NN NOUN _ 16 SUB 16 was _ VB VBD VERB _ 0 ROOT 17 temporarily _ RB RB ADV _ 16 VMOD 18 halted _ VB VBN VERB _ 16 VC 19 in _ IN IN ADP _ 18 VMOD 20 Chicago _ NN NNP NOUN _ 19 PMOD 21 . _ . . . _ 16 P 1 After _ IN IN ADP _ 28 VMOD 2 the _ DT DT DET _ 4 NMOD 3 trading _ NN NN NOUN _ 4 NMOD 4 halt _ NN NN NOUN _ 1 PMOD 5 in _ IN IN ADP _ 4 NMOD 6 the _ DT DT DET _ 9 NMOD 7 S&P _ NN NNP NOUN _ 9 NMOD 8 500 _ CD CD NUM _ 9 NMOD 9 pit _ NN NN NOUN _ 5 PMOD 10 in _ IN IN ADP _ 9 NMOD 11 Chicago _ NN NNP NOUN _ 10 PMOD 12 , _ , , . _ 28 P 13 waves _ NN NNS NOUN _ 16 SUB 14 of _ IN IN ADP _ 13 NMOD 15 selling _ NN NN NOUN _ 14 PMOD 16 continued _ VB VBD VERB _ 28 VMOD 17 to _ TO TO PRT _ 18 VMOD 18 hit _ VB VB VERB _ 16 VMOD 19 stocks _ NN NNS NOUN _ 20 NMOD 20 themselves _ PR PRP PRON _ 18 OBJ 21 on _ IN IN ADP _ 18 VMOD 22 the _ DT DT DET _ 24 NMOD 23 Big _ NN NNP NOUN _ 24 NMOD 24 Board _ NN NNP NOUN _ 21 PMOD 25 , _ , , . _ 28 P 26 and _ CC CC CONJ _ 28 VMOD 27 specialists _ NN NNS NOUN _ 28 SUB 28 continued _ VB VBD VERB _ 0 ROOT 29 to _ TO TO PRT _ 30 VMOD 30 notch _ VB VB VERB _ 28 VMOD 31 prices _ NN NNS NOUN _ 30 OBJ 32 down _ RP RP PRT _ 30 VMOD 33 . _ . . . _ 28 P 1 As _ IN IN ADP _ 13 VMOD 2 a _ DT DT DET _ 3 NMOD 3 result _ NN NN NOUN _ 1 PMOD 4 , _ , , . _ 13 P 5 the _ DT DT DET _ 6 NMOD 6 link _ NN NN NOUN _ 13 SUB 7 between _ IN IN ADP _ 6 NMOD 8 the _ DT DT DET _ 12 NMOD 9 futures _ NN NNS NOUN _ 12 NMOD 10 and _ CC CC CONJ _ 12 NMOD 11 stock _ NN NN NOUN _ 12 NMOD 12 markets _ NN NNS NOUN _ 7 PMOD 13 ripped _ VB VBD VERB _ 0 ROOT 14 apart _ RB RB ADV _ 13 VMOD 15 . _ . . . _ 13 P vowpal-wabbit-8.6.1.dfsg1/demo/dependencyparsing/wsj_train_subset000066400000000000000000002542701332666127000252650ustar00rootroot000000000000001 In _ IN IN ADP _ 43 VMOD 2 an _ DT DT DET _ 5 NMOD 3 Oct. _ NN NNP NOUN _ 5 NMOD 4 19 _ CD CD NUM _ 5 NMOD 5 review _ NN NN NOUN _ 1 PMOD 6 of _ IN IN ADP _ 5 NMOD 7 `` _ `` `` . _ 9 P 8 The _ DT DT DET _ 9 NMOD 9 Misanthrope _ NN NN NOUN _ 6 PMOD 10 '' _ '' '' . _ 9 P 11 at _ IN IN ADP _ 9 NMOD 12 Chicago _ NN NNP NOUN _ 13 NMOD 13 's _ PO POS PRT _ 15 NMOD 14 Goodman _ NN NNP NOUN _ 15 NMOD 15 Theatre _ NN NNP NOUN _ 11 PMOD 16 ( _ -L -LRB- . _ 30 P 17 `` _ `` `` . _ 30 P 18 Revitalized _ VB VBN VERB _ 19 NMOD 19 Classics _ NN NNS NOUN _ 20 SUB 20 Take _ VB VBP VERB _ 30 DEP 21 the _ DT DT DET _ 22 NMOD 22 Stage _ NN NN NOUN _ 20 OBJ 23 in _ IN IN ADP _ 20 VMOD 24 Windy _ NN NNP NOUN _ 25 NMOD 25 City _ NN NNP NOUN _ 23 PMOD 26 , _ , , . _ 30 P 27 '' _ '' '' . _ 30 P 28 Leisure _ NN NN NOUN _ 30 NMOD 29 & _ CC CC CONJ _ 30 NMOD 30 Arts _ NN NNS NOUN _ 5 NMOD 31 ) _ -R -RRB- . _ 30 P 32 , _ , , . _ 43 P 33 the _ DT DT DET _ 34 NMOD 34 role _ NN NN NOUN _ 43 SUB 35 of _ IN IN ADP _ 34 NMOD 36 Celimene _ NN NNP NOUN _ 35 PMOD 37 , _ , , . _ 34 P 38 played _ VB VBN VERB _ 34 NMOD 39 by _ IN IN ADP _ 38 VMOD 40 Kim _ NN NNP NOUN _ 41 NMOD 41 Cattrall _ NN NNP NOUN _ 39 PMOD 42 , _ , , . _ 34 P 43 was _ VB VBD VERB _ 0 ROOT 44 mistakenly _ RB RB ADV _ 45 VMOD 45 attributed _ VB VBN VERB _ 43 VC 46 to _ TO TO PRT _ 45 VMOD 47 Christina _ NN NNP NOUN _ 48 NMOD 48 Haag _ NN NNP NOUN _ 46 PMOD 49 . _ . . . _ 43 P 1 Ms. _ NN NNP NOUN _ 2 NMOD 2 Haag _ NN NNP NOUN _ 3 SUB 3 plays _ VB VBZ VERB _ 0 ROOT 4 Elianti _ NN NNP NOUN _ 3 OBJ 5 . _ . . . _ 3 P 1 Rolls-Royce _ NN NNP NOUN _ 4 NMOD 2 Motor _ NN NNP NOUN _ 4 NMOD 3 Cars _ NN NNPS NOUN _ 4 NMOD 4 Inc. _ NN NNP NOUN _ 5 SUB 5 said _ VB VBD VERB _ 0 ROOT 6 it _ PR PRP PRON _ 7 SUB 7 expects _ VB VBZ VERB _ 5 VMOD 8 its _ PR PRP$ PRON _ 10 NMOD 9 U.S. _ NN NNP NOUN _ 10 NMOD 10 sales _ NN NNS NOUN _ 12 SUB 11 to _ TO TO PRT _ 12 VMOD 12 remain _ VB VB VERB _ 7 VMOD 13 steady _ JJ JJ ADJ _ 12 PRD 14 at _ IN IN ADP _ 12 VMOD 15 about _ IN IN ADP _ 17 NMOD 16 1,200 _ CD CD NUM _ 15 AMOD 17 cars _ NN NNS NOUN _ 14 PMOD 18 in _ IN IN ADP _ 12 VMOD 19 1990 _ CD CD NUM _ 18 PMOD 20 . _ . . . _ 5 P 1 The _ DT DT DET _ 4 NMOD 2 luxury _ NN NN NOUN _ 4 NMOD 3 auto _ NN NN NOUN _ 4 NMOD 4 maker _ NN NN NOUN _ 7 SUB 5 last _ JJ JJ ADJ _ 6 NMOD 6 year _ NN NN NOUN _ 7 VMOD 7 sold _ VB VBD VERB _ 0 ROOT 8 1,214 _ CD CD NUM _ 9 NMOD 9 cars _ NN NNS NOUN _ 7 OBJ 10 in _ IN IN ADP _ 7 VMOD 11 the _ DT DT DET _ 12 NMOD 12 U.S. _ NN NNP NOUN _ 10 PMOD 1 Howard _ NN NNP NOUN _ 2 NMOD 2 Mosher _ NN NNP NOUN _ 8 NMOD 3 , _ , , . _ 8 P 4 president _ NN NN NOUN _ 8 NMOD 5 and _ CC CC CONJ _ 8 NMOD 6 chief _ JJ JJ ADJ _ 8 NMOD 7 executive _ NN NN NOUN _ 8 NMOD 8 officer _ NN NN NOUN _ 10 SUB 9 , _ , , . _ 8 P 10 said _ VB VBD VERB _ 0 ROOT 11 he _ PR PRP PRON _ 12 SUB 12 anticipates _ VB VBZ VERB _ 10 VMOD 13 growth _ NN NN NOUN _ 12 OBJ 14 for _ IN IN ADP _ 13 NMOD 15 the _ DT DT DET _ 18 NMOD 16 luxury _ NN NN NOUN _ 18 NMOD 17 auto _ NN NN NOUN _ 18 NMOD 18 maker _ NN NN NOUN _ 14 PMOD 19 in _ IN IN ADP _ 13 NMOD 20 Britain _ NN NNP NOUN _ 22 NMOD 21 and _ CC CC CONJ _ 22 NMOD 22 Europe _ NN NNP NOUN _ 19 PMOD 23 , _ , , . _ 19 P 24 and _ CC CC CONJ _ 19 PMOD 25 in _ IN IN ADP _ 19 PMOD 26 Far _ JJ JJ ADJ _ 27 AMOD 27 Eastern _ JJ JJ ADJ _ 28 NMOD 28 markets _ NN NNS NOUN _ 25 PMOD 29 . _ . . . _ 10 P 1 BELL _ NN NNP NOUN _ 3 NMOD 2 INDUSTRIES _ NN NNP NOUN _ 3 NMOD 3 Inc. _ NN NNP NOUN _ 4 SUB 4 increased _ VB VBD VERB _ 0 ROOT 5 its _ PR PRP$ PRON _ 6 NMOD 6 quarterly _ NN NN NOUN _ 4 OBJ 7 to _ TO TO PRT _ 4 VMOD 8 10 _ CD CD NUM _ 9 NMOD 9 cents _ NN NNS NOUN _ 7 PMOD 10 from _ IN IN ADP _ 4 VMOD 11 seven _ CD CD NUM _ 12 NMOD 12 cents _ NN NNS NOUN _ 14 NMOD 13 a _ DT DT DET _ 14 NMOD 14 share _ NN NN NOUN _ 10 PMOD 15 . _ . . . _ 4 P 1 The _ DT DT DET _ 3 NMOD 2 new _ JJ JJ ADJ _ 3 NMOD 3 rate _ NN NN NOUN _ 4 SUB 4 will _ MD MD VERB _ 0 ROOT 5 be _ VB VB VERB _ 4 VC 6 payable _ JJ JJ ADJ _ 5 PRD 7 Feb. _ NN NNP NOUN _ 6 AMOD 8 15 _ CD CD NUM _ 7 NMOD 9 . _ . . . _ 4 P 1 A _ DT DT DET _ 3 NMOD 2 record _ NN NN NOUN _ 3 NMOD 3 date _ NN NN NOUN _ 4 SUB 4 has _ VB VBZ VERB _ 0 ROOT 5 n't _ RB RB ADV _ 4 VMOD 6 been _ VB VBN VERB _ 4 VC 7 set _ VB VBN VERB _ 6 VC 8 . _ . . . _ 4 P 1 Bell _ NN NNP NOUN _ 8 SUB 2 , _ , , . _ 1 P 3 based _ VB VBN VERB _ 1 NMOD 4 in _ IN IN ADP _ 3 VMOD 5 Los _ NN NNP NOUN _ 6 NMOD 6 Angeles _ NN NNP NOUN _ 4 PMOD 7 , _ , , . _ 1 P 8 makes _ VB VBZ VERB _ 0 ROOT 9 and _ CC CC CONJ _ 8 VMOD 10 distributes _ VB VBZ VERB _ 8 VMOD 11 electronic _ JJ JJ ADJ _ 16 NMOD 12 , _ , , . _ 11 P 13 computer _ NN NN NOUN _ 11 DEP 14 and _ CC CC CONJ _ 11 DEP 15 building _ NN NN NOUN _ 11 DEP 16 products _ NN NNS NOUN _ 8 OBJ 17 . _ . . . _ 8 P 1 Investors _ NN NNS NOUN _ 2 SUB 2 are _ VB VBP VERB _ 0 ROOT 3 appealing _ VB VBG VERB _ 2 VC 4 to _ TO TO PRT _ 3 VMOD 5 the _ DT DT DET _ 9 NMOD 6 Securities _ NN NNPS NOUN _ 9 NMOD 7 and _ CC CC CONJ _ 9 NMOD 8 Exchange _ NN NNP NOUN _ 9 NMOD 9 Commission _ NN NNP NOUN _ 4 PMOD 10 not _ RB RB ADV _ 12 VMOD 11 to _ TO TO PRT _ 12 VMOD 12 limit _ VB VB VERB _ 3 VMOD 13 their _ PR PRP$ PRON _ 14 NMOD 14 access _ NN NN NOUN _ 12 OBJ 15 to _ TO TO PRT _ 14 NMOD 16 information _ NN NN NOUN _ 15 PMOD 17 about _ IN IN ADP _ 16 NMOD 18 stock _ NN NN NOUN _ 21 NMOD 19 purchases _ NN NNS NOUN _ 21 NMOD 20 and _ CC CC CONJ _ 21 NMOD 21 sales _ NN NNS NOUN _ 17 PMOD 22 by _ IN IN ADP _ 21 NMOD 23 corporate _ JJ JJ ADJ _ 24 NMOD 24 insiders _ NN NNS NOUN _ 22 PMOD 25 . _ . . . _ 2 P 1 A _ DT DT DET _ 3 NMOD 2 SEC _ NN NNP NOUN _ 3 NMOD 3 proposal _ NN NN NOUN _ 12 SUB 4 to _ TO TO PRT _ 5 VMOD 5 ease _ VB VB VERB _ 3 NMOD 6 reporting _ NN NN NOUN _ 7 NMOD 7 requirements _ NN NNS NOUN _ 5 OBJ 8 for _ IN IN ADP _ 7 NMOD 9 some _ DT DT DET _ 11 NMOD 10 company _ NN NN NOUN _ 11 NMOD 11 executives _ NN NNS NOUN _ 8 PMOD 12 would _ MD MD VERB _ 32 VMOD 13 undermine _ VB VB VERB _ 12 VC 14 the _ DT DT DET _ 15 NMOD 15 usefulness _ NN NN NOUN _ 13 OBJ 16 of _ IN IN ADP _ 15 NMOD 17 information _ NN NN NOUN _ 16 PMOD 18 on _ IN IN ADP _ 17 NMOD 19 insider _ NN NN NOUN _ 20 NMOD 20 trades _ NN NNS NOUN _ 18 PMOD 21 as _ IN IN ADP _ 15 NMOD 22 a _ DT DT DET _ 24 NMOD 23 stock-picking _ JJ JJ ADJ _ 24 NMOD 24 tool _ NN NN NOUN _ 21 PMOD 25 , _ , , . _ 32 P 26 individual _ JJ JJ ADJ _ 27 NMOD 27 investors _ NN NNS NOUN _ 31 NMOD 28 and _ CC CC CONJ _ 31 NMOD 29 professional _ JJ JJ ADJ _ 31 NMOD 30 money _ NN NN NOUN _ 31 NMOD 31 managers _ NN NNS NOUN _ 32 SUB 32 contend _ VB VBP VERB _ 0 ROOT 33 . _ . . . _ 32 P 1 They _ PR PRP PRON _ 2 SUB 2 make _ VB VBP VERB _ 0 ROOT 3 the _ DT DT DET _ 4 NMOD 4 argument _ NN NN NOUN _ 2 OBJ 5 in _ IN IN ADP _ 2 VMOD 6 letters _ NN NNS NOUN _ 5 PMOD 7 to _ TO TO PRT _ 6 NMOD 8 the _ DT DT DET _ 9 NMOD 9 agency _ NN NN NOUN _ 7 PMOD 10 about _ IN IN ADP _ 6 NMOD 11 rule _ NN NN NOUN _ 12 NMOD 12 changes _ NN NNS NOUN _ 10 PMOD 13 proposed _ VB VBD VERB _ 12 NMOD 14 this _ DT DT DET _ 16 NMOD 15 past _ JJ JJ ADJ _ 16 NMOD 16 summer _ NN NN NOUN _ 13 VMOD 17 that _ IN IN ADP _ 12 NMOD 18 , _ , , . _ 17 P 19 among _ IN IN ADP _ 23 VMOD 20 other _ JJ JJ ADJ _ 21 NMOD 21 things _ NN NNS NOUN _ 19 PMOD 22 , _ , , . _ 23 P 23 would _ MD MD VERB _ 17 SBAR 24 exempt _ VB VB VERB _ 23 VC 25 many _ JJ JJ ADJ _ 27 NMOD 26 middle-management _ JJ JJ ADJ _ 27 NMOD 27 executives _ NN NNS NOUN _ 24 OBJ 28 from _ IN IN ADP _ 24 VMOD 29 reporting _ VB VBG VERB _ 28 PMOD 30 trades _ NN NNS NOUN _ 29 OBJ 31 in _ IN IN ADP _ 30 NMOD 32 their _ PR PRP$ PRON _ 35 NMOD 33 own _ JJ JJ ADJ _ 35 NMOD 34 companies _ NN NNS NOUN _ 35 NMOD 35 ' _ PO POS PRT _ 36 NMOD 36 shares _ NN NNS NOUN _ 31 PMOD 37 . _ . . . _ 2 P 1 The _ DT DT DET _ 3 NMOD 2 proposed _ VB VBN VERB _ 3 NMOD 3 changes _ NN NNS NOUN _ 5 SUB 4 also _ RB RB ADV _ 5 VMOD 5 would _ MD MD VERB _ 0 ROOT 6 allow _ VB VB VERB _ 5 VC 7 executives _ NN NNS NOUN _ 9 SUB 8 to _ TO TO PRT _ 9 VMOD 9 report _ VB VB VERB _ 6 VMOD 10 exercises _ NN NNS NOUN _ 9 OBJ 11 of _ IN IN ADP _ 10 NMOD 12 options _ NN NNS NOUN _ 11 PMOD 13 later _ RB RBR ADV _ 9 VMOD 14 and _ CC CC CONJ _ 13 AMOD 15 less _ RB RBR ADV _ 16 AMOD 16 often _ RB RB ADV _ 13 AMOD 17 . _ . . . _ 5 P 1 Many _ JJ JJ ADJ _ 5 SUB 2 of _ IN IN ADP _ 1 NMOD 3 the _ DT DT DET _ 4 NMOD 4 letters _ NN NNS NOUN _ 2 PMOD 5 maintain _ VB VBP VERB _ 0 ROOT 6 that _ IN IN ADP _ 5 VMOD 7 investor _ NN NN NOUN _ 8 NMOD 8 confidence _ NN NN NOUN _ 9 SUB 9 has _ VB VBZ VERB _ 25 VMOD 10 been _ VB VBN VERB _ 9 VC 11 so _ RB RB ADV _ 12 VMOD 12 shaken _ VB VBN VERB _ 10 VC 13 by _ IN IN ADP _ 12 VMOD 14 the _ DT DT DET _ 18 NMOD 15 1987 _ CD CD NUM _ 18 NMOD 16 stock _ NN NN NOUN _ 18 NMOD 17 market _ NN NN NOUN _ 18 NMOD 18 crash _ NN NN NOUN _ 13 PMOD 19 -- _ : : . _ 25 P 20 and _ CC CC CONJ _ 25 VMOD 21 the _ DT DT DET _ 22 NMOD 22 markets _ NN NNS NOUN _ 25 SUB 23 already _ RB RB ADV _ 25 VMOD 24 so _ RB RB ADV _ 25 VMOD 25 stacked _ VB VBN VERB _ 6 SBAR 26 against _ IN IN ADP _ 25 VMOD 27 the _ DT DT DET _ 29 NMOD 28 little _ JJ JJ ADJ _ 29 NMOD 29 guy _ NN NN NOUN _ 26 PMOD 30 -- _ : : . _ 25 P 31 that _ IN IN ADP _ 25 VMOD 32 any _ DT DT DET _ 33 NMOD 33 decrease _ NN NN NOUN _ 39 SUB 34 in _ IN IN ADP _ 33 NMOD 35 information _ NN NN NOUN _ 34 PMOD 36 on _ IN IN ADP _ 35 NMOD 37 insider-trading _ NN NN NOUN _ 38 NMOD 38 patterns _ NN NNS NOUN _ 36 PMOD 39 might _ MD MD VERB _ 31 SBAR 40 prompt _ VB VB VERB _ 39 VC 41 individuals _ NN NNS NOUN _ 43 SUB 42 to _ TO TO PRT _ 43 VMOD 43 get _ VB VB VERB _ 40 VMOD 44 out _ RB RB ADV _ 43 VMOD 45 of _ IN IN ADP _ 44 AMOD 46 stocks _ NN NNS NOUN _ 45 PMOD 47 altogether _ RB RB ADV _ 43 VMOD 48 . _ . . . _ 5 P 1 `` _ `` `` . _ 18 P 2 The _ DT DT DET _ 3 NMOD 3 SEC _ NN NNP NOUN _ 4 SUB 4 has _ VB VBZ VERB _ 18 VMOD 5 historically _ RB RB ADV _ 4 VMOD 6 paid _ VB VBN VERB _ 4 VC 7 obeisance _ NN NN NOUN _ 6 OBJ 8 to _ TO TO PRT _ 6 VMOD 9 the _ DT DT DET _ 10 NMOD 10 ideal _ NN NN NOUN _ 8 PMOD 11 of _ IN IN ADP _ 10 NMOD 12 a _ DT DT DET _ 15 NMOD 13 level _ JJ JJ ADJ _ 15 NMOD 14 playing _ NN NN NOUN _ 15 NMOD 15 field _ NN NN NOUN _ 11 PMOD 16 , _ , , . _ 18 P 17 '' _ '' '' . _ 18 P 18 wrote _ VB VBD VERB _ 0 ROOT 19 Clyde _ NN NNP NOUN _ 21 NMOD 20 S. _ NN NNP NOUN _ 21 NMOD 21 McGregor _ NN NNP NOUN _ 18 SUB 22 of _ IN IN ADP _ 21 NMOD 23 Winnetka _ NN NNP NOUN _ 25 NMOD 24 , _ , , . _ 25 P 25 Ill. _ NN NNP NOUN _ 22 PMOD 26 , _ , , . _ 25 P 27 in _ IN IN ADP _ 18 VMOD 28 one _ CD CD NUM _ 27 PMOD 29 of _ IN IN ADP _ 28 NMOD 30 the _ DT DT DET _ 32 NMOD 31 92 _ CD CD NUM _ 32 NMOD 32 letters _ NN NNS NOUN _ 29 PMOD 33 the _ DT DT DET _ 34 NMOD 34 agency _ NN NN NOUN _ 35 SUB 35 has _ VB VBZ VERB _ 32 NMOD 36 received _ VB VBN VERB _ 35 VC 37 since _ IN IN ADP _ 36 VMOD 38 the _ DT DT DET _ 39 NMOD 39 changes _ NN NNS NOUN _ 40 SUB 40 were _ VB VBD VERB _ 37 SBAR 41 proposed _ VB VBN VERB _ 40 VC 42 Aug. _ NN NNP NOUN _ 41 VMOD 43 17 _ CD CD NUM _ 42 NMOD 44 . _ . . . _ 18 P 1 `` _ `` `` . _ 5 P 2 Apparently _ RB RB ADV _ 5 VMOD 3 the _ DT DT DET _ 4 NMOD 4 commission _ NN NN NOUN _ 5 SUB 5 did _ VB VBD VERB _ 0 ROOT 6 not _ RB RB ADV _ 5 VMOD 7 really _ RB RB ADV _ 5 VMOD 8 believe _ VB VB VERB _ 5 VC 9 in _ IN IN ADP _ 8 VMOD 10 this _ DT DT DET _ 11 NMOD 11 ideal _ NN NN NOUN _ 9 PMOD 12 . _ . . . _ 5 P 13 '' _ '' '' . _ 5 P 1 Currently _ RB RB ADV _ 5 VMOD 2 , _ , , . _ 5 P 3 the _ DT DT DET _ 4 NMOD 4 rules _ NN NNS NOUN _ 5 SUB 5 force _ VB VBP VERB _ 0 ROOT 6 executives _ NN NNS NOUN _ 12 NMOD 7 , _ , , . _ 12 P 8 directors _ NN NNS NOUN _ 12 NMOD 9 and _ CC CC CONJ _ 12 NMOD 10 other _ JJ JJ ADJ _ 12 NMOD 11 corporate _ JJ JJ ADJ _ 12 NMOD 12 insiders _ NN NNS NOUN _ 14 SUB 13 to _ TO TO PRT _ 14 VMOD 14 report _ VB VB VERB _ 5 VMOD 15 purchases _ NN NNS NOUN _ 17 NMOD 16 and _ CC CC CONJ _ 17 NMOD 17 sales _ NN NNS NOUN _ 14 OBJ 18 of _ IN IN ADP _ 17 NMOD 19 their _ PR PRP$ PRON _ 21 NMOD 20 companies _ NN NNS NOUN _ 21 NMOD 21 ' _ PO POS PRT _ 22 NMOD 22 shares _ NN NNS NOUN _ 18 PMOD 23 within _ IN IN ADP _ 14 VMOD 24 about _ IN IN ADP _ 26 NMOD 25 a _ DT DT DET _ 24 AMOD 26 month _ NN NN NOUN _ 23 PMOD 27 after _ IN IN ADP _ 26 NMOD 28 the _ DT DT DET _ 29 NMOD 29 transaction _ NN NN NOUN _ 27 PMOD 30 . _ . . . _ 5 P 1 But _ CC CC CONJ _ 14 VMOD 2 about _ IN IN ADP _ 4 NMOD 3 25 _ CD CD NUM _ 2 AMOD 4 % _ NN NN NOUN _ 14 SUB 5 of _ IN IN ADP _ 4 NMOD 6 the _ DT DT DET _ 7 NMOD 7 insiders _ NN NNS NOUN _ 5 PMOD 8 , _ , , . _ 14 P 9 according _ VB VBG VERB _ 14 VMOD 10 to _ TO TO PRT _ 9 PMOD 11 SEC _ NN NNP NOUN _ 12 NMOD 12 figures _ NN NNS NOUN _ 10 PMOD 13 , _ , , . _ 14 P 14 file _ VB VBP VERB _ 0 ROOT 15 their _ PR PRP$ PRON _ 16 NMOD 16 reports _ NN NNS NOUN _ 14 OBJ 17 late _ RB RB ADV _ 14 VMOD 18 . _ . . . _ 14 P 1 The _ DT DT DET _ 2 NMOD 2 changes _ NN NNS NOUN _ 3 SUB 3 were _ VB VBD VERB _ 27 VMOD 4 proposed _ VB VBN VERB _ 3 VC 5 in _ IN IN ADP _ 4 VMOD 6 an _ DT DT DET _ 7 NMOD 7 effort _ NN NN NOUN _ 5 PMOD 8 to _ TO TO PRT _ 9 VMOD 9 streamline _ VB VB VERB _ 7 NMOD 10 federal _ JJ JJ ADJ _ 11 NMOD 11 bureaucracy _ NN NN NOUN _ 9 OBJ 12 and _ CC CC CONJ _ 9 VMOD 13 boost _ VB VB VERB _ 9 VMOD 14 compliance _ NN NN NOUN _ 13 OBJ 15 by _ IN IN ADP _ 14 NMOD 16 the _ DT DT DET _ 17 NMOD 17 executives _ NN NNS NOUN _ 15 PMOD 18 `` _ `` `` . _ 17 P 19 who _ WP WP PRON _ 17 NMOD 20 are _ VB VBP VERB _ 19 SBAR 21 really _ RB RB ADV _ 20 VMOD 22 calling _ VB VBG VERB _ 20 VC 23 the _ DT DT DET _ 24 NMOD 24 shots _ NN NNS NOUN _ 22 OBJ 25 , _ , , . _ 27 P 26 '' _ '' '' . _ 27 P 27 said _ VB VBD VERB _ 0 ROOT 28 Brian _ NN NNP NOUN _ 29 NMOD 29 Lane _ NN NNP NOUN _ 32 NMOD 30 , _ , , . _ 32 P 31 special _ JJ JJ ADJ _ 32 NMOD 32 counsel _ NN NN NOUN _ 27 SUB 33 at _ IN IN ADP _ 32 NMOD 34 the _ DT DT DET _ 36 NMOD 35 SEC _ NN NNP NOUN _ 36 NMOD 36 's _ PO POS PRT _ 37 NMOD 37 office _ NN NN NOUN _ 33 PMOD 38 of _ IN IN ADP _ 37 NMOD 39 disclosure _ NN NN NOUN _ 40 NMOD 40 policy _ NN NN NOUN _ 38 PMOD 41 , _ , , . _ 37 P 42 which _ WD WDT DET _ 37 NMOD 43 proposed _ VB VBD VERB _ 42 SBAR 44 the _ DT DT DET _ 45 NMOD 45 changes _ NN NNS NOUN _ 43 OBJ 46 . _ . . . _ 27 P 1 Investors _ NN NNS NOUN _ 7 NMOD 2 , _ , , . _ 7 P 3 money _ NN NN NOUN _ 4 NMOD 4 managers _ NN NNS NOUN _ 7 NMOD 5 and _ CC CC CONJ _ 7 NMOD 6 corporate _ JJ JJ ADJ _ 7 NMOD 7 officials _ NN NNS NOUN _ 8 SUB 8 had _ VB VBD VERB _ 20 VMOD 9 until _ IN IN ADP _ 8 VMOD 10 today _ NN NN NOUN _ 9 PMOD 11 to _ TO TO PRT _ 12 VMOD 12 comment _ VB VB VERB _ 8 VMOD 13 on _ IN IN ADP _ 12 VMOD 14 the _ DT DT DET _ 15 NMOD 15 proposals _ NN NNS NOUN _ 13 PMOD 16 , _ , , . _ 20 P 17 and _ CC CC CONJ _ 20 VMOD 18 the _ DT DT DET _ 19 NMOD 19 issue _ NN NN NOUN _ 20 SUB 20 has _ VB VBZ VERB _ 34 VMOD 21 produced _ VB VBN VERB _ 20 VC 22 more _ JJ JJR ADJ _ 23 NMOD 23 mail _ NN NN NOUN _ 21 OBJ 24 than _ IN IN ADP _ 23 NMOD 25 almost _ RB RB ADV _ 26 AMOD 26 any _ DT DT DET _ 28 NMOD 27 other _ JJ JJ ADJ _ 28 NMOD 28 issue _ NN NN NOUN _ 24 PMOD 29 in _ IN IN ADP _ 28 NMOD 30 memory _ NN NN NOUN _ 29 PMOD 31 , _ , , . _ 34 P 32 Mr. _ NN NNP NOUN _ 33 NMOD 33 Lane _ NN NNP NOUN _ 34 SUB 34 said _ VB VBD VERB _ 0 ROOT 35 . _ . . . _ 34 P 1 The _ DT DT DET _ 2 NMOD 2 SEC _ NN NNP NOUN _ 3 SUB 3 will _ MD MD VERB _ 14 VMOD 4 probably _ RB RB ADV _ 3 VMOD 5 vote _ VB VB VERB _ 3 VC 6 on _ IN IN ADP _ 5 VMOD 7 the _ DT DT DET _ 8 NMOD 8 proposal _ NN NN NOUN _ 6 PMOD 9 early _ RB RB ADV _ 11 NMOD 10 next _ JJ JJ ADJ _ 11 NMOD 11 year _ NN NN NOUN _ 5 VMOD 12 , _ , , . _ 14 P 13 he _ PR PRP PRON _ 14 SUB 14 said _ VB VBD VERB _ 0 ROOT 15 . _ . . . _ 14 P 1 Not _ RB RB ADV _ 6 SUB 2 all _ PD PDT DET _ 1 NMOD 3 those _ DT DT DET _ 1 NMOD 4 who _ WP WP PRON _ 1 NMOD 5 wrote _ VB VBD VERB _ 4 SBAR 6 oppose _ VB VBP VERB _ 0 ROOT 7 the _ DT DT DET _ 8 NMOD 8 changes _ NN NNS NOUN _ 6 OBJ 9 . _ . . . _ 6 P 1 The _ DT DT DET _ 2 NMOD 2 Committee _ NN NNP NOUN _ 13 SUB 3 on _ IN IN ADP _ 2 NMOD 4 Federal _ NN NNP NOUN _ 5 NMOD 5 Regulation _ NN NNP NOUN _ 3 PMOD 6 of _ IN IN ADP _ 5 NMOD 7 Securities _ NN NNPS NOUN _ 6 PMOD 8 for _ IN IN ADP _ 2 NMOD 9 the _ DT DT DET _ 12 NMOD 10 American _ NN NNP NOUN _ 12 NMOD 11 Bar _ NN NNP NOUN _ 12 NMOD 12 Association _ NN NNP NOUN _ 8 PMOD 13 argues _ VB VBZ VERB _ 0 ROOT 14 , _ , , . _ 13 P 15 for _ IN IN ADP _ 13 VMOD 16 example _ NN NN NOUN _ 15 PMOD 17 , _ , , . _ 13 P 18 in _ IN IN ADP _ 13 VMOD 19 its _ PR PRP$ PRON _ 21 NMOD 20 lengthy _ JJ JJ ADJ _ 21 NMOD 21 letter _ NN NN NOUN _ 18 PMOD 22 to _ TO TO PRT _ 21 NMOD 23 the _ DT DT DET _ 24 NMOD 24 SEC _ NN NNP NOUN _ 22 PMOD 25 , _ , , . _ 13 P 26 that _ IN IN ADP _ 13 VMOD 27 the _ DT DT DET _ 29 NMOD 28 proposed _ VB VBN VERB _ 29 NMOD 29 changes _ NN NNS NOUN _ 31 SUB 30 `` _ `` `` . _ 31 P 31 would _ MD MD VERB _ 26 SBAR 32 substantially _ RB RB ADV _ 33 VMOD 33 improve _ VB VB VERB _ 31 VC 34 the _ DT DT DET _ 36 NMOD 35 { _ -L -LRB- . _ 36 P 36 law _ NN NN NOUN _ 33 OBJ 37 } _ -R -RRB- . _ 36 P 38 by _ IN IN ADP _ 33 VMOD 39 conforming _ VB VBG VERB _ 38 PMOD 40 it _ PR PRP PRON _ 39 OBJ 41 more _ RB RBR ADV _ 42 AMOD 42 closely _ RB RB ADV _ 39 VMOD 43 to _ TO TO PRT _ 39 VMOD 44 contemporary _ JJ JJ ADJ _ 46 NMOD 45 business _ NN NN NOUN _ 46 NMOD 46 realities _ NN NNS NOUN _ 43 PMOD 47 . _ . . . _ 13 P 48 '' _ '' '' . _ 13 P 1 What _ WP WP PRON _ 12 VMOD 2 the _ DT DT DET _ 3 NMOD 3 investors _ NN NNS NOUN _ 9 SUB 4 who _ WP WP PRON _ 3 NMOD 5 oppose _ VB VBP VERB _ 4 SBAR 6 the _ DT DT DET _ 8 NMOD 7 proposed _ VB VBN VERB _ 8 NMOD 8 changes _ NN NNS NOUN _ 5 OBJ 9 object _ VB VBP VERB _ 1 SBAR 10 to _ TO TO PRT _ 9 VMOD 11 most _ RB RBS ADV _ 9 VMOD 12 is _ VB VBZ VERB _ 0 ROOT 13 the _ DT DT DET _ 14 NMOD 14 effect _ NN NN NOUN _ 12 PRD 15 they _ PR PRP PRON _ 16 SUB 16 say _ VB VBP VERB _ 14 NMOD 17 the _ DT DT DET _ 18 NMOD 18 proposal _ NN NN NOUN _ 19 SUB 19 would _ MD MD VERB _ 16 VMOD 20 have _ VB VB VERB _ 19 VC 21 on _ IN IN ADP _ 20 VMOD 22 their _ PR PRP$ PRON _ 23 NMOD 23 ability _ NN NN NOUN _ 21 PMOD 24 to _ TO TO PRT _ 25 VMOD 25 spot _ VB VB VERB _ 23 NMOD 26 telltale _ JJ JJ ADJ _ 28 NMOD 27 `` _ `` `` . _ 28 P 28 clusters _ NN NNS NOUN _ 36 NMOD 29 '' _ '' '' . _ 28 P 30 of _ IN IN ADP _ 28 NMOD 31 trading _ NN NN NOUN _ 32 NMOD 32 activity _ NN NN NOUN _ 30 PMOD 33 -- _ : : . _ 36 P 34 buying _ NN NN NOUN _ 36 NMOD 35 or _ CC CC CONJ _ 36 NMOD 36 selling _ NN NN NOUN _ 25 OBJ 37 by _ IN IN ADP _ 36 NMOD 38 more _ JJ JJR ADJ _ 39 AMOD 39 than _ IN IN ADP _ 43 NMOD 40 one _ CD CD NUM _ 39 AMOD 41 officer _ NN NN NOUN _ 43 NMOD 42 or _ CC CC CONJ _ 43 NMOD 43 director _ NN NN NOUN _ 37 PMOD 44 within _ IN IN ADP _ 36 NMOD 45 a _ DT DT DET _ 47 NMOD 46 short _ JJ JJ ADJ _ 47 NMOD 47 period _ NN NN NOUN _ 44 PMOD 48 of _ IN IN ADP _ 47 NMOD 49 time _ NN NN NOUN _ 48 PMOD 50 . _ . . . _ 12 P 1 According _ VB VBG VERB _ 9 VMOD 2 to _ TO TO PRT _ 1 PMOD 3 some _ DT DT DET _ 4 NMOD 4 estimates _ NN NNS NOUN _ 2 PMOD 5 , _ , , . _ 9 P 6 the _ DT DT DET _ 8 NMOD 7 rule _ NN NN NOUN _ 8 NMOD 8 changes _ NN NNS NOUN _ 9 SUB 9 would _ MD MD VERB _ 0 ROOT 10 cut _ VB VB VERB _ 9 VC 11 insider _ NN NN NOUN _ 12 NMOD 12 filings _ NN NNS NOUN _ 10 OBJ 13 by _ IN IN ADP _ 10 VMOD 14 more _ JJ JJR ADJ _ 13 PMOD 15 than _ IN IN ADP _ 14 NMOD 16 a _ DT DT DET _ 17 NMOD 17 third _ NN NN NOUN _ 15 PMOD 18 . _ . . . _ 9 P 1 The _ DT DT DET _ 3 NMOD 2 SEC _ NN NNP NOUN _ 3 NMOD 3 's _ PO POS PRT _ 5 NMOD 4 Mr. _ NN NNP NOUN _ 5 NMOD 5 Lane _ NN NNP NOUN _ 7 SUB 6 vehemently _ RB RB ADV _ 7 VMOD 7 disputed _ VB VBD VERB _ 0 ROOT 8 those _ DT DT DET _ 9 NMOD 9 estimates _ NN NNS NOUN _ 7 OBJ 10 . _ . . . _ 7 P 1 The _ DT DT DET _ 2 NMOD 2 rules _ NN NNS NOUN _ 3 SUB 3 will _ MD MD VERB _ 23 VMOD 4 eliminate _ VB VB VERB _ 3 VC 5 filings _ NN NNS NOUN _ 7 NMOD 6 policy-making _ VB VBG VERB _ 7 NMOD 7 divisions _ NN NNS NOUN _ 4 OBJ 8 , _ , , . _ 7 P 9 such _ JJ JJ ADJ _ 10 PMOD 10 as _ IN IN ADP _ 7 NMOD 11 sales _ NN NNS NOUN _ 19 NMOD 12 , _ , , . _ 19 P 13 marketing _ NN NN NOUN _ 19 NMOD 14 , _ , , . _ 19 P 15 finance _ NN NN NOUN _ 19 NMOD 16 and _ CC CC CONJ _ 19 NMOD 17 research _ NN NN NOUN _ 19 NMOD 18 and _ CC CC CONJ _ 19 NMOD 19 development _ NN NN NOUN _ 10 PMOD 20 , _ , , . _ 23 P 21 Mr. _ NN NNP NOUN _ 22 NMOD 22 Lane _ NN NNP NOUN _ 23 SUB 23 said _ VB VBD VERB _ 0 ROOT 24 . _ . . . _ 23 P 1 The _ DT DT DET _ 3 NMOD 2 proposed _ VB VBN VERB _ 3 NMOD 3 rules _ NN NNS NOUN _ 5 SUB 4 also _ RB RB ADV _ 5 VMOD 5 would _ MD MD VERB _ 18 VMOD 6 be _ VB VB VERB _ 5 VC 7 tougher _ JJ JJR ADJ _ 6 PRD 8 on _ IN IN ADP _ 7 AMOD 9 the _ DT DT DET _ 10 NMOD 10 insiders _ NN NNS NOUN _ 8 PMOD 11 still _ RB RB ADV _ 10 NMOD 12 required _ JJ JJ ADJ _ 11 VMOD 13 to _ TO TO PRT _ 14 VMOD 14 file _ VB VB VERB _ 11 VMOD 15 reports _ NN NNS NOUN _ 14 OBJ 16 , _ , , . _ 18 P 17 he _ PR PRP PRON _ 18 SUB 18 said _ VB VBD VERB _ 0 ROOT 19 . _ . . . _ 18 P 1 Companies _ NN NNS NOUN _ 2 SUB 2 would _ MD MD VERB _ 0 ROOT 3 be _ VB VB VERB _ 2 VC 4 compelled _ VB VBN VERB _ 3 VC 5 to _ TO TO PRT _ 6 VMOD 6 publish _ VB VB VERB _ 4 VMOD 7 in _ IN IN ADP _ 6 VMOD 8 annual _ JJ JJ ADJ _ 10 NMOD 9 proxy _ NN NN NOUN _ 10 NMOD 10 statements _ NN NNS NOUN _ 7 PMOD 11 the _ DT DT DET _ 12 NMOD 12 names _ NN NNS NOUN _ 6 OBJ 13 of _ IN IN ADP _ 12 NMOD 14 insiders _ NN NNS NOUN _ 13 PMOD 15 who _ WP WP PRON _ 14 NMOD 16 fail _ VB VBP VERB _ 15 SBAR 17 to _ TO TO PRT _ 18 VMOD 18 file _ VB VB VERB _ 16 VMOD 19 reports _ NN NNS NOUN _ 18 OBJ 20 on _ IN IN ADP _ 18 VMOD 21 time _ NN NN NOUN _ 20 PMOD 22 . _ . . . _ 2 P 1 Considered _ VB VBN VERB _ 18 VMOD 2 as _ IN IN ADP _ 1 VMOD 3 a _ DT DT DET _ 4 NMOD 4 whole _ NN NN NOUN _ 2 PMOD 5 , _ , , . _ 8 P 6 Mr. _ NN NNP NOUN _ 7 NMOD 7 Lane _ NN NNP NOUN _ 8 SUB 8 said _ VB VBD VERB _ 18 VMOD 9 , _ , , . _ 8 P 10 the _ DT DT DET _ 11 NMOD 11 filings _ NN NNS NOUN _ 18 SUB 12 required _ VB VBN VERB _ 11 NMOD 13 under _ IN IN ADP _ 12 VMOD 14 the _ DT DT DET _ 16 NMOD 15 proposed _ VB VBN VERB _ 16 NMOD 16 rules _ NN NNS NOUN _ 13 PMOD 17 `` _ `` `` . _ 18 P 18 will _ MD MD VERB _ 0 ROOT 19 be _ VB VB VERB _ 18 VC 20 at _ IN IN ADP _ 21 AMOD 21 least _ RB RBS ADV _ 19 PRD 22 as _ IN IN ADP _ 21 AMOD 23 effective _ JJ JJ ADJ _ 21 AMOD 24 , _ , , . _ 25 P 25 if _ IN IN ADP _ 21 AMOD 26 not _ RB RB ADV _ 25 DEP 27 more _ RB RBR ADV _ 28 AMOD 28 so _ RB RB ADV _ 26 DEP 29 , _ , , . _ 25 P 30 for _ IN IN ADP _ 19 VMOD 31 investors _ NN NNS NOUN _ 30 PMOD 32 following _ VB VBG VERB _ 31 NMOD 33 transactions _ NN NNS NOUN _ 32 OBJ 34 . _ . . . _ 18 P 35 '' _ '' '' . _ 18 P 1 But _ CC CC CONJ _ 24 VMOD 2 Robert _ NN NNP NOUN _ 3 NMOD 3 Gabele _ NN NNP NOUN _ 5 NMOD 4 , _ , , . _ 5 P 5 president _ NN NN NOUN _ 24 SUB 6 of _ IN IN ADP _ 5 NMOD 7 Invest\/Net _ NN NNP NOUN _ 15 NMOD 8 , _ , , . _ 15 P 9 a _ DT DT DET _ 15 NMOD 10 North _ NN NNP NOUN _ 13 NMOD 11 Miami _ NN NNP NOUN _ 13 NMOD 12 , _ , , . _ 13 P 13 Fla. _ NN NNP NOUN _ 15 NMOD 14 , _ , , . _ 13 P 15 company _ NN NN NOUN _ 6 PMOD 16 that _ WD WDT DET _ 15 NMOD 17 packages _ VB VBZ VERB _ 16 SBAR 18 and _ CC CC CONJ _ 17 VMOD 19 sells _ VB VBZ VERB _ 17 VMOD 20 the _ DT DT DET _ 22 NMOD 21 insider-trading _ NN NN NOUN _ 22 NMOD 22 data _ NN NNS NOUN _ 17 OBJ 23 , _ , , . _ 5 P 24 said _ VB VBD VERB _ 0 ROOT 25 the _ DT DT DET _ 26 NMOD 26 proposal _ NN NN NOUN _ 27 SUB 27 is _ VB VBZ VERB _ 24 VMOD 28 worded _ VB VBN VERB _ 27 VC 29 so _ RB RB ADV _ 28 VMOD 30 vaguely _ RB RB ADV _ 29 AMOD 31 that _ IN IN ADP _ 29 AMOD 32 key _ JJ JJ ADJ _ 33 NMOD 33 officials _ NN NNS NOUN _ 34 SUB 34 may _ MD MD VERB _ 31 SBAR 35 fail _ VB VB VERB _ 34 VC 36 to _ TO TO PRT _ 37 VMOD 37 file _ VB VB VERB _ 35 VMOD 38 the _ DT DT DET _ 39 NMOD 39 reports _ NN NNS NOUN _ 37 OBJ 40 . _ . . . _ 24 P 1 Many _ JJ JJ ADJ _ 2 NMOD 2 investors _ NN NNS NOUN _ 3 SUB 3 wrote _ VB VBD VERB _ 0 ROOT 4 asking _ VB VBG VERB _ 3 VMOD 5 the _ DT DT DET _ 6 NMOD 6 SEC _ NN NNP NOUN _ 4 OBJ 7 to _ TO TO PRT _ 8 VMOD 8 require _ VB VB VERB _ 4 VMOD 9 insiders _ NN NNS NOUN _ 11 SUB 10 to _ TO TO PRT _ 11 VMOD 11 report _ VB VB VERB _ 8 VMOD 12 their _ PR PRP$ PRON _ 15 NMOD 13 purchases _ NN NNS NOUN _ 15 NMOD 14 and _ CC CC CONJ _ 15 NMOD 15 sales _ NN NNS NOUN _ 11 OBJ 16 immediately _ RB RB ADV _ 18 AMOD 17 , _ , , . _ 18 P 18 not _ RB RB ADV _ 11 VMOD 19 a _ DT DT DET _ 20 NMOD 20 month _ NN NN NOUN _ 21 AMOD 21 later _ RB RB ADV _ 18 AMOD 22 . _ . . . _ 3 P 1 But _ CC CC CONJ _ 4 VMOD 2 Mr. _ NN NNP NOUN _ 3 NMOD 3 Lane _ NN NNP NOUN _ 4 SUB 4 said _ VB VBD VERB _ 0 ROOT 5 that _ IN IN ADP _ 4 VMOD 6 while _ IN IN ADP _ 15 VMOD 7 the _ DT DT DET _ 8 NMOD 8 SEC _ NN NNP NOUN _ 9 SUB 9 regulates _ VB VBZ VERB _ 6 SBAR 10 who _ WP WP PRON _ 9 VMOD 11 files _ VB VBZ VERB _ 10 SBAR 12 , _ , , . _ 15 P 13 the _ DT DT DET _ 14 NMOD 14 law _ NN NN NOUN _ 15 SUB 15 tells _ VB VBZ VERB _ 5 SBAR 16 them _ PR PRP PRON _ 15 OBJ 17 when _ WR WRB ADV _ 15 VMOD 18 to _ TO TO PRT _ 19 VMOD 19 do _ VB VB VERB _ 17 SBAR 20 so _ RB RB ADV _ 19 PRD 21 . _ . . . _ 4 P 1 Investors _ NN NNS NOUN _ 9 SUB 2 who _ WP WP PRON _ 1 NMOD 3 want _ VB VBP VERB _ 2 SBAR 4 to _ TO TO PRT _ 5 VMOD 5 change _ VB VB VERB _ 3 VMOD 6 the _ DT DT DET _ 8 NMOD 7 required _ JJ JJ ADJ _ 8 NMOD 8 timing _ NN NN NOUN _ 5 OBJ 9 should _ MD MD VERB _ 17 VMOD 10 write _ VB VB VERB _ 9 VC 11 their _ PR PRP$ PRON _ 12 NMOD 12 representatives _ NN NNS NOUN _ 10 OBJ 13 in _ IN IN ADP _ 12 NMOD 14 Congress _ NN NNP NOUN _ 13 PMOD 15 , _ , , . _ 17 P 16 he _ PR PRP PRON _ 17 SUB 17 added _ VB VBD VERB _ 0 ROOT 18 . _ . . . _ 17 P 1 The _ DT DT DET _ 2 NMOD 2 SEC _ NN NNP NOUN _ 3 SUB 3 would _ MD MD VERB _ 22 VMOD 4 likely _ RB RB ADV _ 3 VMOD 5 be _ VB VB VERB _ 3 VC 6 amenable _ JJ JJ ADJ _ 5 PRD 7 to _ TO TO PRT _ 6 AMOD 8 legislation _ NN NN NOUN _ 7 PMOD 9 that _ WD WDT DET _ 8 NMOD 10 required _ VB VBD VERB _ 9 SBAR 11 insiders _ NN NNS NOUN _ 13 SUB 12 to _ TO TO PRT _ 13 VMOD 13 file _ VB VB VERB _ 10 VMOD 14 transactions _ NN NNS NOUN _ 13 OBJ 15 on _ IN IN ADP _ 13 VMOD 16 a _ DT DT DET _ 19 NMOD 17 more _ RB RBR ADV _ 18 AMOD 18 timely _ JJ JJ ADJ _ 19 NMOD 19 basis _ NN NN NOUN _ 15 PMOD 20 , _ , , . _ 22 P 21 he _ PR PRP PRON _ 22 SUB 22 said _ VB VBD VERB _ 0 ROOT 23 . _ . . . _ 22 P 1 The _ DT DT DET _ 3 NMOD 2 nation _ NN NN NOUN _ 3 NMOD 3 's _ PO POS PRT _ 6 NMOD 4 largest _ JJ JJS ADJ _ 6 NMOD 5 pension _ NN NN NOUN _ 6 NMOD 6 fund _ NN NN NOUN _ 17 SUB 7 , _ , , . _ 6 P 8 which _ WD WDT DET _ 6 NMOD 9 oversees _ VB VBZ VERB _ 8 SBAR 10 $ _ $ $ . _ 9 P 11 80 _ CD CD NUM _ 10 AMOD 12 billion _ CD CD NUM _ 10 AMOD 13 for _ IN IN ADP _ 9 VMOD 14 college _ NN NN NOUN _ 15 NMOD 15 employees _ NN NNS NOUN _ 13 PMOD 16 , _ , , . _ 6 P 17 plans _ VB VBZ VERB _ 0 ROOT 18 to _ TO TO PRT _ 19 VMOD 19 offer _ VB VB VERB _ 17 VMOD 20 two _ CD CD NUM _ 23 NMOD 21 new _ JJ JJ ADJ _ 23 NMOD 22 investment _ NN NN NOUN _ 23 NMOD 23 options _ NN NNS NOUN _ 19 OBJ 24 to _ TO TO PRT _ 19 VMOD 25 its _ PR PRP$ PRON _ 28 NMOD 26 1.2 _ CD CD NUM _ 27 AMOD 27 million _ CD CD NUM _ 28 NMOD 28 participants _ NN NNS NOUN _ 24 PMOD 29 . _ . . . _ 17 P 1 The _ DT DT DET _ 9 NMOD 2 Teachers _ NN NNPS NOUN _ 9 NMOD 3 Insurance _ NN NNP NOUN _ 9 NMOD 4 and _ CC CC CONJ _ 9 NMOD 5 Annuity _ NN NNP NOUN _ 9 NMOD 6 Association-College _ NN NNP NOUN _ 9 NMOD 7 Retirement _ NN NNP NOUN _ 9 NMOD 8 Equities _ NN NNPS NOUN _ 9 NMOD 9 Fund _ NN NNP NOUN _ 10 SUB 10 said _ VB VBD VERB _ 0 ROOT 11 it _ PR PRP PRON _ 12 SUB 12 will _ MD MD VERB _ 10 VMOD 13 introduce _ VB VB VERB _ 12 VC 14 a _ DT DT DET _ 18 NMOD 15 stock _ NN NN NOUN _ 18 NMOD 16 and _ CC CC CONJ _ 18 NMOD 17 bond _ NN NN NOUN _ 18 NMOD 18 fund _ NN NN NOUN _ 32 NMOD 19 that _ WD WDT DET _ 18 NMOD 20 will _ MD MD VERB _ 19 SBAR 21 invest _ VB VB VERB _ 20 VC 22 in _ IN IN ADP _ 21 VMOD 23 `` _ `` `` . _ 27 P 24 socially _ RB RB ADV _ 25 AMOD 25 responsible _ JJ JJ ADJ _ 27 NMOD 26 '' _ '' '' . _ 27 P 27 companies _ NN NNS NOUN _ 22 PMOD 28 , _ , , . _ 32 P 29 and _ CC CC CONJ _ 32 NMOD 30 a _ DT DT DET _ 32 NMOD 31 bond _ NN NN NOUN _ 32 NMOD 32 fund _ NN NN NOUN _ 13 OBJ 33 . _ . . . _ 10 P 1 Both _ DT DT DET _ 2 NMOD 2 funds _ NN NNS NOUN _ 3 SUB 3 are _ VB VBP VERB _ 0 ROOT 4 expected _ VB VBN VERB _ 3 VC 5 to _ TO TO PRT _ 6 VMOD 6 begin _ VB VB VERB _ 4 VMOD 7 operation _ NN NN NOUN _ 6 OBJ 8 around _ IN IN ADP _ 6 VMOD 9 March _ NN NNP NOUN _ 8 PMOD 10 1 _ CD CD NUM _ 9 NMOD 11 , _ , , . _ 6 P 12 subject _ JJ JJ ADJ _ 6 VMOD 13 to _ TO TO PRT _ 12 AMOD 14 Securities _ NN NNPS NOUN _ 18 NMOD 15 and _ CC CC CONJ _ 18 NMOD 16 Exchange _ NN NNP NOUN _ 18 NMOD 17 Commission _ NN NNP NOUN _ 18 NMOD 18 approval _ NN NN NOUN _ 13 PMOD 19 . _ . . . _ 3 P 1 For _ IN IN ADP _ 14 VMOD 2 its _ PR PRP$ PRON _ 3 NMOD 3 employees _ NN NNS NOUN _ 5 SUB 4 to _ TO TO PRT _ 5 VMOD 5 sign _ VB VB VERB _ 1 SBAR 6 up _ RP RP PRT _ 5 VMOD 7 for _ IN IN ADP _ 5 VMOD 8 the _ DT DT DET _ 9 NMOD 9 options _ NN NNS NOUN _ 7 PMOD 10 , _ , , . _ 14 P 11 a _ DT DT DET _ 12 NMOD 12 college _ NN NN NOUN _ 14 SUB 13 also _ RB RB ADV _ 14 VMOD 14 must _ MD MD VERB _ 0 ROOT 15 approve _ VB VB VERB _ 14 VC 16 the _ DT DT DET _ 17 NMOD 17 plan _ NN NN NOUN _ 15 OBJ 18 . _ . . . _ 14 P 1 Some _ DT DT DET _ 3 NMOD 2 4,300 _ CD CD NUM _ 1 AMOD 3 institutions _ NN NNS NOUN _ 4 SUB 4 are _ VB VBP VERB _ 0 ROOT 5 part _ NN NN NOUN _ 4 PRD 6 of _ IN IN ADP _ 5 NMOD 7 the _ DT DT DET _ 9 NMOD 8 pension _ NN NN NOUN _ 9 NMOD 9 fund _ NN NN NOUN _ 6 PMOD 10 . _ . . . _ 4 P 1 The _ DT DT DET _ 3 NMOD 2 new _ JJ JJ ADJ _ 3 NMOD 3 options _ NN NNS NOUN _ 4 SUB 4 carry _ VB VBP VERB _ 0 ROOT 5 out _ RP RP PRT _ 4 VMOD 6 part _ NN NN NOUN _ 4 OBJ 7 of _ IN IN ADP _ 6 NMOD 8 an _ DT DT DET _ 9 NMOD 9 agreement _ NN NN NOUN _ 7 PMOD 10 that _ IN IN ADP _ 9 NMOD 11 the _ DT DT DET _ 13 NMOD 12 pension _ NN NN NOUN _ 13 NMOD 13 fund _ NN NN NOUN _ 30 SUB 14 , _ , , . _ 30 P 15 under _ IN IN ADP _ 30 VMOD 16 pressure _ NN NN NOUN _ 15 PMOD 17 to _ TO TO PRT _ 18 VMOD 18 relax _ VB VB VERB _ 16 NMOD 19 its _ PR PRP$ PRON _ 22 NMOD 20 strict _ JJ JJ ADJ _ 22 NMOD 21 participation _ NN NN NOUN _ 22 NMOD 22 rules _ NN NNS NOUN _ 18 OBJ 23 and _ CC CC CONJ _ 18 VMOD 24 to _ TO TO PRT _ 25 VMOD 25 provide _ VB VB VERB _ 18 VMOD 26 more _ JJ JJR ADJ _ 28 NMOD 27 investment _ NN NN NOUN _ 28 NMOD 28 options _ NN NNS NOUN _ 25 OBJ 29 , _ , , . _ 30 P 30 reached _ VB VBN VERB _ 10 SBAR 31 with _ IN IN ADP _ 30 VMOD 32 the _ DT DT DET _ 33 NMOD 33 SEC _ NN NNP NOUN _ 31 PMOD 34 in _ IN IN ADP _ 30 VMOD 35 December _ NN NNP NOUN _ 34 PMOD 36 . _ . . . _ 4 P 1 The _ DT DT DET _ 7 NMOD 2 new _ JJ JJ ADJ _ 7 NMOD 3 `` _ `` `` . _ 7 P 4 social _ JJ JJ ADJ _ 7 NMOD 5 choice _ NN NN NOUN _ 7 NMOD 6 '' _ '' '' . _ 7 P 7 fund _ NN NN NOUN _ 8 SUB 8 will _ MD MD VERB _ 0 ROOT 9 shun _ VB VB VERB _ 8 VC 10 securities _ NN NNS NOUN _ 9 OBJ 11 of _ IN IN ADP _ 10 NMOD 12 companies _ NN NNS NOUN _ 11 PMOD 13 linked _ VB VBN VERB _ 12 NMOD 14 to _ TO TO PRT _ 13 VMOD 15 South _ NN NNP NOUN _ 16 NMOD 16 Africa _ NN NNP NOUN _ 26 NMOD 17 , _ , , . _ 26 P 18 nuclear _ JJ JJ ADJ _ 19 NMOD 19 power _ NN NN NOUN _ 26 NMOD 20 and _ CC CC CONJ _ 26 NMOD 21 in _ IN IN ADP _ 26 NMOD 22 some _ DT DT DET _ 23 NMOD 23 cases _ NN NNS NOUN _ 21 PMOD 24 , _ , , . _ 26 P 25 Northern _ NN NNP NOUN _ 26 NMOD 26 Ireland _ NN NNP NOUN _ 14 PMOD 27 . _ . . . _ 8 P 1 Also _ RB RB ADV _ 3 VMOD 2 excluded _ VB VBN VERB _ 3 VMOD 3 will _ MD MD VERB _ 0 ROOT 4 be _ VB VB VERB _ 3 VC 5 investments _ NN NNS NOUN _ 3 SUB 6 in _ IN IN ADP _ 5 NMOD 7 companies _ NN NNS NOUN _ 6 PMOD 8 with _ IN IN ADP _ 7 NMOD 9 `` _ `` `` . _ 12 P 10 significant _ JJ JJ ADJ _ 12 NMOD 11 '' _ '' '' . _ 12 P 12 business _ NN NN NOUN _ 8 PMOD 13 stemming _ VB VBG VERB _ 12 NMOD 14 from _ IN IN ADP _ 13 VMOD 15 weapons _ NN NNS NOUN _ 16 NMOD 16 manufacture _ NN NN NOUN _ 21 NMOD 17 , _ , , . _ 21 P 18 alcoholic _ JJ JJ ADJ _ 19 NMOD 19 beverages _ NN NNS NOUN _ 21 NMOD 20 or _ CC CC CONJ _ 21 NMOD 21 tobacco _ NN NN NOUN _ 14 PMOD 22 . _ . . . _ 3 P 1 Sixty _ CD CD NUM _ 2 NMOD 2 percent _ NN NN NOUN _ 6 SUB 3 of _ IN IN ADP _ 2 NMOD 4 the _ DT DT DET _ 5 NMOD 5 fund _ NN NN NOUN _ 3 PMOD 6 will _ MD MD VERB _ 0 ROOT 7 be _ VB VB VERB _ 6 VC 8 invested _ VB VBN VERB _ 7 VC 9 in _ IN IN ADP _ 8 VMOD 10 stocks _ NN NNS NOUN _ 9 PMOD 11 , _ , , . _ 8 P 12 with _ IN IN ADP _ 8 VMOD 13 the _ DT DT DET _ 14 NMOD 14 rest _ NN NN NOUN _ 15 SUB 15 going _ VB VBG VERB _ 12 PMOD 16 into _ IN IN ADP _ 15 VMOD 17 bonds _ NN NNS NOUN _ 20 NMOD 18 or _ CC CC CONJ _ 20 NMOD 19 short-term _ JJ JJ ADJ _ 20 NMOD 20 investments _ NN NNS NOUN _ 16 PMOD 21 . _ . . . _ 6 P 1 The _ DT DT DET _ 3 NMOD 2 bond _ NN NN NOUN _ 3 NMOD 3 fund _ NN NN NOUN _ 4 SUB 4 will _ MD MD VERB _ 0 ROOT 5 invest _ VB VB VERB _ 4 VC 6 in _ IN IN ADP _ 5 VMOD 7 high-grade _ JJ JJ ADJ _ 9 AMOD 8 or _ CC CC CONJ _ 9 AMOD 9 medium-grade _ JJ JJ ADJ _ 10 NMOD 10 bonds _ NN NNS NOUN _ 15 NMOD 11 , _ , , . _ 15 P 12 mortgages _ NN NNS NOUN _ 15 NMOD 13 or _ CC CC CONJ _ 15 NMOD 14 asset-backed _ JJ JJ ADJ _ 15 NMOD 15 securities _ NN NNS NOUN _ 6 PMOD 16 , _ , , . _ 15 P 17 including _ VB VBG VERB _ 15 NMOD 18 as _ JJ JJ ADJ _ 20 AMOD 19 much _ RB RB ADV _ 20 AMOD 20 as _ IN IN ADP _ 22 NMOD 21 15 _ CD CD NUM _ 20 AMOD 22 % _ NN NN NOUN _ 17 PMOD 23 in _ IN IN ADP _ 22 NMOD 24 foreign _ JJ JJ ADJ _ 25 NMOD 25 securities _ NN NNS NOUN _ 23 PMOD 26 . _ . . . _ 4 P 1 The _ DT DT DET _ 2 NMOD 2 fund _ NN NN NOUN _ 4 SUB 3 also _ RB RB ADV _ 4 VMOD 4 might _ MD MD VERB _ 0 ROOT 5 buy _ VB VB VERB _ 4 VC 6 and _ CC CC CONJ _ 5 VMOD 7 sell _ VB VB VERB _ 5 VMOD 8 futures _ NN NNS NOUN _ 11 NMOD 9 and _ CC CC CONJ _ 11 NMOD 10 options _ NN NNS NOUN _ 11 NMOD 11 contracts _ NN NNS NOUN _ 5 OBJ 12 , _ , , . _ 5 P 13 subject _ JJ JJ ADJ _ 5 VMOD 14 to _ TO TO PRT _ 13 AMOD 15 approval _ NN NN NOUN _ 14 PMOD 16 by _ IN IN ADP _ 15 NMOD 17 the _ DT DT DET _ 22 NMOD 18 New _ NN NNP NOUN _ 22 NMOD 19 York _ NN NNP NOUN _ 22 NMOD 20 State _ NN NNP NOUN _ 22 NMOD 21 Insurance _ NN NNP NOUN _ 22 NMOD 22 Department _ NN NNP NOUN _ 16 PMOD 23 . _ . . . _ 4 P 1 Under _ IN IN ADP _ 7 VMOD 2 two _ CD CD NUM _ 4 NMOD 3 new _ JJ JJ ADJ _ 4 NMOD 4 features _ NN NNS NOUN _ 1 PMOD 5 , _ , , . _ 7 P 6 participants _ NN NNS NOUN _ 7 SUB 7 will _ MD MD VERB _ 0 ROOT 8 be _ VB VB VERB _ 7 VC 9 able _ JJ JJ ADJ _ 8 VMOD 10 to _ TO TO PRT _ 11 VMOD 11 transfer _ VB VB VERB _ 9 AMOD 12 money _ NN NN NOUN _ 11 OBJ 13 from _ IN IN ADP _ 11 VMOD 14 the _ DT DT DET _ 16 NMOD 15 new _ JJ JJ ADJ _ 16 NMOD 16 funds _ NN NNS NOUN _ 13 PMOD 17 to _ TO TO PRT _ 11 VMOD 18 other _ JJ JJ ADJ _ 20 NMOD 19 investment _ NN NN NOUN _ 20 NMOD 20 funds _ NN NNS NOUN _ 17 PMOD 21 or _ CC CC CONJ _ 11 VMOD 22 , _ , , . _ 23 P 23 if _ IN IN ADP _ 29 VMOD 24 their _ PR PRP$ PRON _ 25 NMOD 25 jobs _ NN NNS NOUN _ 26 SUB 26 are _ VB VBP VERB _ 23 SBAR 27 terminated _ VB VBN VERB _ 26 VC 28 , _ , , . _ 23 P 29 receive _ VB VB VERB _ 11 VMOD 30 cash _ NN NN NOUN _ 29 OBJ 31 from _ IN IN ADP _ 29 VMOD 32 the _ DT DT DET _ 33 NMOD 33 funds _ NN NNS NOUN _ 31 PMOD 34 . _ . . . _ 7 P 1 The _ DT DT DET _ 3 NMOD 2 investment _ NN NN NOUN _ 3 NMOD 3 choices _ NN NNS NOUN _ 10 SUB 4 offered _ VB VBN VERB _ 3 NMOD 5 by _ IN IN ADP _ 4 VMOD 6 the _ DT DT DET _ 8 NMOD 7 pension _ NN NN NOUN _ 8 NMOD 8 fund _ NN NN NOUN _ 5 PMOD 9 currently _ RB RB ADV _ 10 VMOD 10 are _ VB VBP VERB _ 0 ROOT 11 limited _ VB VBN VERB _ 10 VC 12 to _ TO TO PRT _ 11 VMOD 13 a _ DT DT DET _ 15 NMOD 14 stock _ NN NN NOUN _ 15 NMOD 15 fund _ NN NN NOUN _ 22 NMOD 16 , _ , , . _ 22 P 17 an _ DT DT DET _ 18 NMOD 18 annuity _ NN NN NOUN _ 22 NMOD 19 and _ CC CC CONJ _ 22 NMOD 20 a _ DT DT DET _ 22 NMOD 21 money-market _ NN NN NOUN _ 22 NMOD 22 fund _ NN NN NOUN _ 12 PMOD 23 . _ . . . _ 10 P 1 New _ NN NNP NOUN _ 4 NMOD 2 Brunswick _ NN NNP NOUN _ 4 NMOD 3 Scientific _ NN NNP NOUN _ 4 NMOD 4 Co. _ NN NNP NOUN _ 7 NMOD 5 , _ , , . _ 7 P 6 a _ DT DT DET _ 7 NMOD 7 maker _ NN NN NOUN _ 14 SUB 8 of _ IN IN ADP _ 7 NMOD 9 biotechnology _ NN NN NOUN _ 12 NMOD 10 instrumentation _ NN NN NOUN _ 12 NMOD 11 and _ CC CC CONJ _ 12 NMOD 12 equipment _ NN NN NOUN _ 8 PMOD 13 , _ , , . _ 7 P 14 said _ VB VBD VERB _ 0 ROOT 15 it _ PR PRP PRON _ 16 SUB 16 adopted _ VB VBD VERB _ 14 VMOD 17 an _ DT DT DET _ 19 NMOD 18 anti-takeover _ JJ JJ ADJ _ 19 NMOD 19 plan _ NN NN NOUN _ 16 OBJ 20 giving _ VB VBG VERB _ 19 NMOD 21 shareholders _ NN NNS NOUN _ 20 OBJ 22 the _ DT DT DET _ 23 NMOD 23 right _ NN NN NOUN _ 20 OBJ 24 to _ TO TO PRT _ 25 VMOD 25 purchase _ VB VB VERB _ 23 NMOD 26 shares _ NN NNS NOUN _ 25 OBJ 27 at _ IN IN ADP _ 25 VMOD 28 half _ JJ JJ ADJ _ 29 NMOD 29 price _ NN NN NOUN _ 27 PMOD 30 under _ IN IN ADP _ 25 VMOD 31 certain _ JJ JJ ADJ _ 32 NMOD 32 conditions _ NN NNS NOUN _ 30 PMOD 33 . _ . . . _ 14 P 1 The _ DT DT DET _ 2 NMOD 2 company _ NN NN NOUN _ 3 SUB 3 said _ VB VBD VERB _ 0 ROOT 4 the _ DT DT DET _ 5 NMOD 5 plan _ NN NN NOUN _ 13 SUB 6 , _ , , . _ 13 P 7 under _ IN IN ADP _ 9 PRD 8 review _ NN NN NOUN _ 7 PMOD 9 for _ IN IN ADP _ 13 VMOD 10 some _ DT DT DET _ 11 NMOD 11 time _ NN NN NOUN _ 9 PMOD 12 , _ , , . _ 13 P 13 will _ MD MD VERB _ 3 VMOD 14 protect _ VB VB VERB _ 13 VC 15 shareholders _ NN NNS NOUN _ 14 OBJ 16 against _ IN IN ADP _ 14 VMOD 17 `` _ `` `` . _ 16 P 18 abusive _ JJ JJ ADJ _ 20 NMOD 19 takeover _ NN NN NOUN _ 20 NMOD 20 tactics _ NN NNS NOUN _ 16 PMOD 21 . _ . . . _ 3 P 1 W. _ NN NNP NOUN _ 3 NMOD 2 Ed _ NN NNP NOUN _ 3 NMOD 3 Tyler _ NN NNP NOUN _ 12 NMOD 4 , _ , , . _ 12 P 5 37 _ CD CD NUM _ 6 NMOD 6 years _ NN NNS NOUN _ 7 AMOD 7 old _ JJ JJ ADJ _ 12 NMOD 8 , _ , , . _ 12 P 9 a _ DT DT DET _ 12 NMOD 10 senior _ JJ JJ ADJ _ 12 NMOD 11 vice _ NN NN NOUN _ 12 NMOD 12 president _ NN NN NOUN _ 18 SUB 13 at _ IN IN ADP _ 12 NMOD 14 this _ DT DT DET _ 16 NMOD 15 printing _ VB VBG VERB _ 16 NMOD 16 concern _ NN NN NOUN _ 13 PMOD 17 , _ , , . _ 12 P 18 was _ VB VBD VERB _ 0 ROOT 19 elected _ VB VBN VERB _ 18 VC 20 president _ NN NN NOUN _ 28 NMOD 21 of _ IN IN ADP _ 20 NMOD 22 its _ PR PRP$ PRON _ 24 NMOD 23 technology _ NN NN NOUN _ 24 NMOD 24 group _ NN NN NOUN _ 21 PMOD 25 , _ , , . _ 28 P 26 a _ DT DT DET _ 28 NMOD 27 new _ JJ JJ ADJ _ 28 NMOD 28 position _ NN NN NOUN _ 19 VMOD 29 . _ . . . _ 18 P 1 Solo _ JJ JJ ADJ _ 3 NMOD 2 woodwind _ NN NN NOUN _ 3 NMOD 3 players _ NN NNS NOUN _ 4 SUB 4 have _ VB VBP VERB _ 0 ROOT 5 to _ TO TO PRT _ 6 VMOD 6 be _ VB VB VERB _ 4 VMOD 7 creative _ JJ JJ ADJ _ 6 PRD 8 if _ IN IN ADP _ 4 VMOD 9 they _ PR PRP PRON _ 10 SUB 10 want _ VB VBP VERB _ 8 SBAR 11 to _ TO TO PRT _ 12 VMOD 12 work _ VB VB VERB _ 10 VMOD 13 a _ DT DT DET _ 14 NMOD 14 lot _ NN NN NOUN _ 12 VMOD 15 , _ , , . _ 4 P 16 because _ IN IN ADP _ 4 VMOD 17 their _ PR PRP$ PRON _ 21 NMOD 18 repertoire _ NN NN NOUN _ 21 NMOD 19 and _ CC CC CONJ _ 21 NMOD 20 audience _ NN NN NOUN _ 21 NMOD 21 appeal _ NN NN NOUN _ 22 SUB 22 are _ VB VBP VERB _ 16 SBAR 23 limited _ JJ JJ ADJ _ 22 PRD 24 . _ . . . _ 4 P 1 The _ DT DT DET _ 4 NMOD 2 oboist _ NN NN NOUN _ 4 NMOD 3 Heinz _ NN NNP NOUN _ 4 NMOD 4 Holliger _ NN NNP NOUN _ 5 SUB 5 has _ VB VBZ VERB _ 15 VMOD 6 taken _ VB VBN VERB _ 5 VC 7 a _ DT DT DET _ 9 NMOD 8 hard _ JJ JJ ADJ _ 9 NMOD 9 line _ NN NN NOUN _ 6 OBJ 10 about _ IN IN ADP _ 6 VMOD 11 the _ DT DT DET _ 12 NMOD 12 problem _ NN NN NOUN _ 10 PMOD 13 : _ : : . _ 15 P 14 He _ PR PRP PRON _ 15 SUB 15 commissions _ VB VBZ VERB _ 0 ROOT 16 and _ CC CC CONJ _ 15 VMOD 17 splendidly _ RB RB ADV _ 18 VMOD 18 interprets _ VB VBZ VERB _ 15 VMOD 19 fearsome _ JJ JJ ADJ _ 21 NMOD 20 contemporary _ JJ JJ ADJ _ 21 NMOD 21 scores _ NN NNS NOUN _ 15 VMOD 22 and _ CC CC CONJ _ 15 VMOD 23 does _ VB VBZ VERB _ 15 VMOD 24 some _ DT DT DET _ 25 NMOD 25 conducting _ NN NN NOUN _ 23 OBJ 26 , _ , , . _ 15 P 27 so _ IN IN ADP _ 15 VMOD 28 he _ PR PRP PRON _ 29 SUB 29 does _ VB VBZ VERB _ 27 SBAR 30 n't _ RB RB ADV _ 29 VMOD 31 have _ VB VB VERB _ 29 VC 32 to _ TO TO PRT _ 33 VMOD 33 play _ VB VB VERB _ 31 VMOD 34 the _ DT DT DET _ 39 NMOD 35 same _ JJ JJ ADJ _ 39 NMOD 36 Mozart _ NN NNP NOUN _ 39 NMOD 37 and _ CC CC CONJ _ 39 NMOD 38 Strauss _ NN NNP NOUN _ 39 NMOD 39 concertos _ NN NNS NOUN _ 33 OBJ 40 over _ RB RB ADV _ 33 VMOD 41 and _ CC CC CONJ _ 40 AMOD 42 over _ RB RB ADV _ 40 AMOD 43 again _ RB RB ADV _ 33 VMOD 44 . _ . . . _ 15 P 1 Richard _ NN NNP NOUN _ 2 NMOD 2 Stoltzman _ NN NNP NOUN _ 3 SUB 3 has _ VB VBZ VERB _ 0 ROOT 4 taken _ VB VBN VERB _ 3 VC 5 a _ DT DT DET _ 10 NMOD 6 gentler _ JJ JJR ADJ _ 10 NMOD 7 , _ , , . _ 10 P 8 more _ RB RBR ADV _ 9 AMOD 9 audience-friendly _ JJ JJ ADJ _ 10 NMOD 10 approach _ NN NN NOUN _ 4 OBJ 11 . _ . . . _ 3 P 1 Years _ NN NNS NOUN _ 2 AMOD 2 ago _ RB RB ADV _ 5 VMOD 3 , _ , , . _ 5 P 4 he _ PR PRP PRON _ 5 SUB 5 collaborated _ VB VBD VERB _ 0 ROOT 6 with _ IN IN ADP _ 5 VMOD 7 the _ DT DT DET _ 10 NMOD 8 new _ JJ JJ ADJ _ 10 NMOD 9 music _ NN NN NOUN _ 10 NMOD 10 gurus _ NN NNS NOUN _ 15 NMOD 11 Peter _ NN NNP NOUN _ 12 NMOD 12 Serkin _ NN NNP NOUN _ 15 NMOD 13 and _ CC CC CONJ _ 15 NMOD 14 Fred _ NN NNP NOUN _ 15 NMOD 15 Sherry _ NN NNP NOUN _ 6 PMOD 16 in _ IN IN ADP _ 5 VMOD 17 the _ DT DT DET _ 21 NMOD 18 very _ RB RB ADV _ 19 AMOD 19 countercultural _ JJ JJ ADJ _ 21 NMOD 20 chamber _ NN NN NOUN _ 21 NMOD 21 group _ NN NN NOUN _ 22 NMOD 22 Tashi _ NN NNP NOUN _ 16 PMOD 23 , _ , , . _ 22 P 24 which _ WD WDT DET _ 22 NMOD 25 won _ VB VBD VERB _ 24 SBAR 26 audiences _ NN NNS NOUN _ 25 OBJ 27 over _ RP RP PRT _ 25 VMOD 28 to _ TO TO PRT _ 27 AMOD 29 dreaded _ JJ JJ ADJ _ 31 NMOD 30 contemporary _ JJ JJ ADJ _ 31 NMOD 31 scores _ NN NNS NOUN _ 28 PMOD 32 like _ IN IN ADP _ 31 NMOD 33 Messiaen _ NN NNP NOUN _ 34 NMOD 34 's _ PO POS PRT _ 36 NMOD 35 `` _ `` `` . _ 36 P 36 Quartet _ NN NN NOUN _ 32 PMOD 37 for _ IN IN ADP _ 36 NMOD 38 the _ DT DT DET _ 39 NMOD 39 End _ NN NN NOUN _ 37 PMOD 40 of _ IN IN ADP _ 39 NMOD 41 Time _ NN NN NOUN _ 40 PMOD 42 . _ . . . _ 5 P 43 '' _ '' '' . _ 5 P 1 Today _ NN NN NOUN _ 6 VMOD 2 , _ , , . _ 6 P 3 the _ DT DT DET _ 5 NMOD 4 pixie-like _ JJ JJ ADJ _ 5 NMOD 5 clarinetist _ NN NN NOUN _ 6 SUB 6 has _ VB VBZ VERB _ 0 ROOT 7 mostly _ RB RB ADV _ 6 VMOD 8 dropped _ VB VBN VERB _ 6 VC 9 the _ DT DT DET _ 11 NMOD 10 missionary _ JJ JJ ADJ _ 11 NMOD 11 work _ NN NN NOUN _ 8 OBJ 12 ( _ -L -LRB- . _ 13 P 13 though _ IN IN ADP _ 8 VMOD 14 a _ DT DT DET _ 15 NMOD 15 touch _ NN NN NOUN _ 21 SUB 16 of _ IN IN ADP _ 15 NMOD 17 the _ DT DT DET _ 19 NMOD 18 old _ JJ JJ ADJ _ 19 NMOD 19 Tashi _ NN NNP NOUN _ 16 PMOD 20 still _ RB RB ADV _ 21 VMOD 21 survives _ VB VBZ VERB _ 13 SBAR 22 ) _ -R -RRB- . _ 13 P 23 and _ CC CC CONJ _ 6 VMOD 24 now _ RB RB ADV _ 6 VMOD 25 goes _ VB VBZ VERB _ 6 VMOD 26 on _ IN IN ADP _ 25 VMOD 27 the _ DT DT DET _ 28 NMOD 28 road _ NN NN NOUN _ 26 PMOD 29 with _ IN IN ADP _ 25 VMOD 30 piano _ NN NN NOUN _ 40 NMOD 31 , _ , , . _ 40 P 32 bass _ NN NN NOUN _ 40 NMOD 33 , _ , , . _ 40 P 34 a _ DT DT DET _ 36 NMOD 35 slide _ NN NN NOUN _ 36 NMOD 36 show _ NN NN NOUN _ 40 NMOD 37 , _ , , . _ 40 P 38 and _ CC CC CONJ _ 40 NMOD 39 a _ DT DT DET _ 40 NMOD 40 repertoire _ NN NN NOUN _ 29 PMOD 41 that _ WD WDT DET _ 40 NMOD 42 ranges _ VB VBZ VERB _ 41 SBAR 43 from _ IN IN ADP _ 42 VMOD 44 light _ JJ JJ ADJ _ 45 NMOD 45 classical _ JJ JJ ADJ _ 43 PMOD 46 to _ TO TO PRT _ 43 PMOD 47 light _ JJ JJ ADJ _ 48 NMOD 48 jazz _ NN NN NOUN _ 46 PMOD 49 to _ TO TO PRT _ 43 PMOD 50 light _ JJ JJ ADJ _ 51 NMOD 51 pop _ NN NN NOUN _ 49 PMOD 52 , _ , , . _ 42 P 53 with _ IN IN ADP _ 42 VMOD 54 a _ DT DT DET _ 57 NMOD 55 few _ JJ JJ ADJ _ 57 NMOD 56 notable _ JJ JJ ADJ _ 57 NMOD 57 exceptions _ NN NNS NOUN _ 53 PMOD 58 . _ . . . _ 6 P 1 Just _ RB RB ADV _ 3 NMOD 2 the _ DT DT DET _ 3 NMOD 3 thing _ NN NN NOUN _ 0 ROOT 4 for _ IN IN ADP _ 3 DEP 5 the _ DT DT DET _ 7 NMOD 6 Vivaldi-at-brunch _ JJ JJ ADJ _ 7 NMOD 7 set _ NN NN NOUN _ 11 NMOD 8 , _ , , . _ 11 P 9 the _ DT DT DET _ 11 NMOD 10 yuppie _ NN NN NOUN _ 11 NMOD 11 audience _ NN NN NOUN _ 4 PMOD 12 that _ WD WDT DET _ 11 NMOD 13 has _ VB VBZ VERB _ 12 SBAR 14 embraced _ VB VBN VERB _ 13 VC 15 New _ NN NNP NOUN _ 16 NMOD 16 Age _ NN NNP NOUN _ 14 OBJ 17 as _ IN IN ADP _ 14 VMOD 18 its _ PR PRP$ PRON _ 22 NMOD 19 very _ RB RB ADV _ 20 AMOD 20 own _ JJ JJ ADJ _ 22 NMOD 21 easy _ JJ JJ ADJ _ 22 NMOD 22 listening _ NN NN NOUN _ 17 PMOD 23 . _ . . . _ 3 P 1 But _ CC CC CONJ _ 3 VMOD 2 you _ PR PRP PRON _ 3 SUB 3 ca _ MD MD VERB _ 0 ROOT 4 n't _ RB RB ADV _ 3 VMOD 5 dismiss _ VB VB VERB _ 3 VC 6 Mr. _ NN NNP NOUN _ 8 NMOD 7 Stoltzman _ NN NNP NOUN _ 8 NMOD 8 's _ PO POS PRT _ 9 NMOD 9 music _ NN NN NOUN _ 12 NMOD 10 or _ CC CC CONJ _ 12 NMOD 11 his _ PR PRP$ PRON _ 12 NMOD 12 motives _ NN NNS NOUN _ 5 OBJ 13 as _ RB RB ADV _ 5 VMOD 14 merely _ RB RB ADV _ 17 AMOD 15 commercial _ JJ JJ ADJ _ 17 AMOD 16 and _ CC CC CONJ _ 17 AMOD 17 lightweight _ JJ JJ ADJ _ 13 PMOD 18 . _ . . . _ 3 P 1 He _ PR PRP PRON _ 2 SUB 2 believes _ VB VBZ VERB _ 10 VMOD 3 in _ IN IN ADP _ 2 VMOD 4 what _ WP WP PRON _ 3 PMOD 5 he _ PR PRP PRON _ 6 SUB 6 plays _ VB VBZ VERB _ 4 SBAR 7 , _ , , . _ 10 P 8 and _ CC CC CONJ _ 10 VMOD 9 he _ PR PRP PRON _ 10 SUB 10 plays _ VB VBZ VERB _ 0 ROOT 11 superbly _ RB RB ADV _ 10 VMOD 12 . _ . . . _ 10 P 1 His _ PR PRP$ PRON _ 3 NMOD 2 recent _ JJ JJ ADJ _ 3 NMOD 3 appearance _ NN NN NOUN _ 16 SUB 4 at _ IN IN ADP _ 3 NMOD 5 the _ DT DT DET _ 7 NMOD 6 Metropolitan _ NN NNP NOUN _ 7 NMOD 7 Museum _ NN NNP NOUN _ 4 PMOD 8 , _ , , . _ 3 P 9 dubbed _ VB VBN VERB _ 3 NMOD 10 `` _ `` `` . _ 13 P 11 A _ DT DT DET _ 13 NMOD 12 Musical _ JJ JJ ADJ _ 13 NMOD 13 Odyssey _ NN NN NOUN _ 9 VMOD 14 , _ , , . _ 13 P 15 '' _ '' '' . _ 13 P 16 was _ VB VBD VERB _ 0 ROOT 17 a _ DT DT DET _ 18 NMOD 18 case _ NN NN NOUN _ 16 PRD 19 in _ IN IN ADP _ 18 NMOD 20 point _ NN NN NOUN _ 19 PMOD 21 . _ . . . _ 16 P 1 It _ PR PRP PRON _ 2 SUB 2 felt _ VB VBD VERB _ 0 ROOT 3 more _ RB RBR ADV _ 4 PMOD 4 like _ IN IN ADP _ 2 VMOD 5 a _ DT DT DET _ 6 NMOD 6 party _ NN NN NOUN _ 13 NMOD 7 , _ , , . _ 13 P 8 or _ CC CC CONJ _ 13 NMOD 9 a _ DT DT DET _ 13 NMOD 10 highly _ RB RB ADV _ 11 AMOD 11 polished _ JJ JJ ADJ _ 13 NMOD 12 jam _ NN NN NOUN _ 13 NMOD 13 session _ NN NN NOUN _ 4 PMOD 14 with _ IN IN ADP _ 13 NMOD 15 a _ DT DT DET _ 17 NMOD 16 few _ JJ JJ ADJ _ 17 NMOD 17 friends _ NN NNS NOUN _ 14 PMOD 18 , _ , , . _ 13 P 19 than _ IN IN ADP _ 4 PMOD 20 a _ DT DT DET _ 22 NMOD 21 classical _ JJ JJ ADJ _ 22 NMOD 22 concert _ NN NN NOUN _ 19 PMOD 23 . _ . . . _ 2 P 1 Clad _ VB VBN VERB _ 12 VMOD 2 in _ IN IN ADP _ 1 VMOD 3 his _ PR PRP$ PRON _ 7 NMOD 4 trademark _ NN NN NOUN _ 7 NMOD 5 black _ JJ JJ ADJ _ 7 NMOD 6 velvet _ NN NN NOUN _ 7 NMOD 7 suit _ NN NN NOUN _ 2 PMOD 8 , _ , , . _ 12 P 9 the _ DT DT DET _ 11 NMOD 10 soft-spoken _ JJ JJ ADJ _ 11 NMOD 11 clarinetist _ NN NN NOUN _ 12 SUB 12 announced _ VB VBD VERB _ 0 ROOT 13 that _ IN IN ADP _ 38 DEP 14 his _ PR PRP$ PRON _ 16 NMOD 15 new _ JJ JJ ADJ _ 16 NMOD 16 album _ NN NN NOUN _ 20 NMOD 17 , _ , , . _ 20 P 18 `` _ `` `` . _ 20 P 19 Inner _ JJ JJ ADJ _ 20 NMOD 20 Voices _ NN NNS NOUN _ 23 SUB 21 , _ , , . _ 20 P 22 '' _ '' '' . _ 20 P 23 had _ VB VBD VERB _ 13 SBAR 24 just _ RB RB ADV _ 23 VMOD 25 been _ VB VBN VERB _ 23 VC 26 released _ VB VBN VERB _ 25 VC 27 , _ , , . _ 38 P 28 that _ IN IN ADP _ 38 DEP 29 his _ PR PRP$ PRON _ 30 NMOD 30 family _ NN NN NOUN _ 31 SUB 31 was _ VB VBD VERB _ 28 SBAR 32 in _ IN IN ADP _ 31 VMOD 33 the _ DT DT DET _ 35 NMOD 34 front _ NN NN NOUN _ 35 NMOD 35 row _ NN NN NOUN _ 32 PMOD 36 , _ , , . _ 38 P 37 and _ CC CC CONJ _ 38 DEP 38 that _ IN IN ADP _ 12 VMOD 39 it _ PR PRP PRON _ 40 SUB 40 was _ VB VBD VERB _ 48 VMOD 41 his _ PR PRP$ PRON _ 43 NMOD 42 mother _ NN NN NOUN _ 43 NMOD 43 's _ PO POS PRT _ 44 NMOD 44 birthday _ NN NN NOUN _ 40 PRD 45 , _ , , . _ 48 P 46 so _ RB RB ADV _ 48 VMOD 47 he _ PR PRP PRON _ 48 SUB 48 was _ VB VBD VERB _ 38 SBAR 49 going _ VB VBG VERB _ 48 VC 50 to _ TO TO PRT _ 51 VMOD 51 play _ VB VB VERB _ 49 VMOD 52 her _ PR PRP$ PRON _ 54 NMOD 53 favorite _ JJ JJ ADJ _ 54 NMOD 54 tune _ NN NN NOUN _ 51 OBJ 55 from _ IN IN ADP _ 54 NMOD 56 the _ DT DT DET _ 57 NMOD 57 record _ NN NN NOUN _ 55 PMOD 58 . _ . . . _ 12 P 1 He _ PR PRP PRON _ 2 SUB 2 launched _ VB VBD VERB _ 0 ROOT 3 into _ IN IN ADP _ 2 VMOD 4 Saint-Saens _ NN NNP NOUN _ 5 NMOD 5 's _ PO POS PRT _ 8 NMOD 6 `` _ `` `` . _ 8 P 7 The _ DT DT DET _ 8 NMOD 8 Swan _ NN NN NOUN _ 21 NMOD 9 '' _ '' '' . _ 8 P 10 from _ IN IN ADP _ 8 NMOD 11 `` _ `` `` . _ 10 P 12 Carnival _ NN NN NOUN _ 10 PMOD 13 of _ IN IN ADP _ 12 NMOD 14 the _ DT DT DET _ 15 NMOD 15 Animals _ NN NNS NOUN _ 13 PMOD 16 , _ , , . _ 21 P 17 '' _ '' '' . _ 21 P 18 a _ DT DT DET _ 21 NMOD 19 favorite _ JJ JJ ADJ _ 21 NMOD 20 encore _ NN NN NOUN _ 21 NMOD 21 piece _ NN NN NOUN _ 3 PMOD 22 for _ IN IN ADP _ 21 NMOD 23 cellists _ NN NNS NOUN _ 22 PMOD 24 , _ , , . _ 21 P 25 with _ IN IN ADP _ 2 VMOD 26 lovely _ JJ JJ ADJ _ 29 NMOD 27 , _ , , . _ 29 P 28 glossy _ JJ JJ ADJ _ 29 NMOD 29 tone _ NN NN NOUN _ 32 NMOD 30 and _ CC CC CONJ _ 32 NMOD 31 no _ DT DT DET _ 32 NMOD 32 bite _ NN NN NOUN _ 25 PMOD 33 . _ . . . _ 2 P 1 Then _ RB RB ADV _ 16 VMOD 2 , _ , , . _ 16 P 3 as _ IN IN ADP _ 4 DEP 4 if _ IN IN ADP _ 16 VMOD 5 to _ TO TO PRT _ 6 VMOD 6 show _ VB VB VERB _ 4 SBAR 7 that _ IN IN ADP _ 6 VMOD 8 he _ PR PRP PRON _ 9 SUB 9 could _ MD MD VERB _ 7 SBAR 10 play _ VB VB VERB _ 9 VC 11 fast _ RB RB ADV _ 10 VMOD 12 as _ IN IN ADP _ 13 AMOD 13 well _ RB RB ADV _ 10 VMOD 14 , _ , , . _ 16 P 15 he _ PR PRP PRON _ 16 SUB 16 offered _ VB VBD VERB _ 0 ROOT 17 the _ DT DT DET _ 19 NMOD 18 second _ JJ JJ ADJ _ 19 NMOD 19 movement _ NN NN NOUN _ 31 NMOD 20 from _ IN IN ADP _ 19 NMOD 21 Saint-Saens _ NN NNP NOUN _ 22 NMOD 22 's _ PO POS PRT _ 23 NMOD 23 Sonata _ NN NN NOUN _ 20 PMOD 24 for _ IN IN ADP _ 23 NMOD 25 Clarinet _ NN NN NOUN _ 24 PMOD 26 , _ , , . _ 31 P 27 a _ DT DT DET _ 31 NMOD 28 whimsical _ JJ JJ ADJ _ 31 NMOD 29 , _ , , . _ 31 P 30 puckish _ JJ JJ ADJ _ 31 NMOD 31 tidbit _ NN NN NOUN _ 16 OBJ 32 that _ WD WDT DET _ 31 NMOD 33 reflected _ VB VBD VERB _ 32 SBAR 34 the _ DT DT DET _ 36 NMOD 35 flip _ JJ JJ ADJ _ 36 NMOD 36 side _ NN NN NOUN _ 33 OBJ 37 of _ IN IN ADP _ 36 NMOD 38 the _ DT DT DET _ 40 NMOD 39 Stoltzman _ NN NNP NOUN _ 40 NMOD 40 personality _ NN NN NOUN _ 37 PMOD 41 . _ . . . _ 16 P 1 And _ CC CC CONJ _ 4 VMOD 2 so _ RB RB ADV _ 4 VMOD 3 it _ PR PRP PRON _ 4 SUB 4 went _ VB VBD VERB _ 0 ROOT 5 through _ IN IN ADP _ 4 VMOD 6 the _ DT DT DET _ 8 NMOD 7 first _ JJ JJ ADJ _ 8 NMOD 8 half _ NN NN NOUN _ 5 PMOD 9 : _ : : . _ 4 P 10 an _ DT DT DET _ 13 NMOD 11 ingeniously _ RB RB ADV _ 12 AMOD 12 chosen _ VB VBN VERB _ 13 NMOD 13 potpourri _ NN NN NOUN _ 4 DEP 14 of _ IN IN ADP _ 13 NMOD 15 pieces _ NN NNS NOUN _ 23 NMOD 16 , _ , , . _ 23 P 17 none _ NN NN NOUN _ 23 NMOD 18 longer _ JJ JJR ADJ _ 17 NMOD 19 than _ IN IN ADP _ 18 AMOD 20 five _ CD CD NUM _ 21 NMOD 21 minutes _ NN NNS NOUN _ 19 PMOD 22 , _ , , . _ 23 P 23 none _ NN NN NOUN _ 14 PMOD 24 that _ WD WDT DET _ 23 NMOD 25 would _ MD MD VERB _ 24 SBAR 26 disturb _ VB VB VERB _ 25 VC 27 or _ CC CC CONJ _ 26 VMOD 28 challenge _ VB VB VERB _ 26 VMOD 29 a _ DT DT DET _ 30 NMOD 30 listener _ NN NN NOUN _ 26 OBJ 31 . _ . . . _ 4 P 1 Mr. _ NN NNP NOUN _ 2 NMOD 2 Stoltzman _ NN NNP NOUN _ 3 SUB 3 introduced _ VB VBD VERB _ 0 ROOT 4 his _ PR PRP$ PRON _ 5 NMOD 5 colleagues _ NN NNS NOUN _ 22 NMOD 6 : _ : : . _ 22 P 7 Bill _ NN NNP NOUN _ 8 NMOD 8 Douglas _ NN NNP NOUN _ 14 NMOD 9 , _ , , . _ 14 P 10 pianist\/bassoonist\/composer _ NN NN NOUN _ 14 NMOD 11 and _ CC CC CONJ _ 14 NMOD 12 an _ DT DT DET _ 14 NMOD 13 old _ JJ JJ ADJ _ 14 NMOD 14 buddy _ NN NN NOUN _ 22 NMOD 15 from _ IN IN ADP _ 14 NMOD 16 Yale _ NN NNP NOUN _ 15 PMOD 17 , _ , , . _ 22 P 18 and _ CC CC CONJ _ 22 NMOD 19 jazz _ NN NN NOUN _ 22 NMOD 20 bassist _ NN NN NOUN _ 22 NMOD 21 Eddie _ NN NNP NOUN _ 22 NMOD 22 Gomez _ NN NNP NOUN _ 3 OBJ 23 . _ . . . _ 3 P 1 An _ DT DT DET _ 3 NMOD 2 improvisational _ JJ JJ ADJ _ 3 NMOD 3 section _ NN NN NOUN _ 4 SUB 4 was _ VB VBD VERB _ 0 ROOT 5 built _ VB VBN VERB _ 4 VC 6 around _ IN IN ADP _ 5 VMOD 7 pieces _ NN NNS NOUN _ 6 PMOD 8 by _ IN IN ADP _ 7 NMOD 9 Mr. _ NN NNP NOUN _ 10 NMOD 10 Douglas _ NN NNP NOUN _ 8 PMOD 11 , _ , , . _ 5 P 12 beginning _ NN NN NOUN _ 5 VMOD 13 with _ IN IN ADP _ 12 PMOD 14 `` _ `` `` . _ 23 P 15 Golden _ JJ JJ ADJ _ 16 NMOD 16 Rain _ NN NN NOUN _ 23 NMOD 17 , _ , , . _ 23 P 18 '' _ '' '' . _ 23 P 19 a _ DT DT DET _ 23 NMOD 20 lilting _ JJ JJ ADJ _ 23 NMOD 21 , _ , , . _ 23 P 22 laid-back _ JJ JJ ADJ _ 23 NMOD 23 lead _ NN NN NOUN _ 13 PMOD 24 in _ IN IN ADP _ 23 NMOD 25 to _ TO TO PRT _ 24 PMOD 26 the _ DT DT DET _ 29 NMOD 27 uptempo _ JJ JJ ADJ _ 29 NMOD 28 `` _ `` `` . _ 29 P 29 Sky _ NN NN NOUN _ 25 PMOD 30 , _ , , . _ 29 P 31 '' _ '' '' . _ 29 P 32 which _ WD WDT DET _ 29 NMOD 33 gave _ VB VBD VERB _ 32 SBAR 34 Mr. _ NN NNP NOUN _ 35 NMOD 35 Stoltzman _ NN NNP NOUN _ 33 OBJ 36 the _ DT DT DET _ 37 NMOD 37 opportunity _ NN NN NOUN _ 33 OBJ 38 to _ TO TO PRT _ 39 VMOD 39 wail _ VB VB VERB _ 37 NMOD 40 in _ IN IN ADP _ 39 VMOD 41 a _ DT DT DET _ 43 NMOD 42 high _ JJ JJ ADJ _ 43 NMOD 43 register _ NN NN NOUN _ 40 PMOD 44 and _ CC CC CONJ _ 39 VMOD 45 show _ VB VB VERB _ 39 VMOD 46 off _ RP RP PRT _ 45 VMOD 47 his _ PR PRP$ PRON _ 49 NMOD 48 fleet _ JJ JJ ADJ _ 49 NMOD 49 fingers _ NN NNS NOUN _ 45 OBJ 50 . _ . . . _ 4 P 1 Bach _ NN NNP NOUN _ 2 NMOD 2 's _ PO POS PRT _ 4 NMOD 3 `` _ `` `` . _ 4 P 4 Air _ NN NN NOUN _ 6 SUB 5 '' _ '' '' . _ 4 P 6 followed _ VB VBD VERB _ 0 ROOT 7 . _ . . . _ 6 P 1 Mr. _ NN NNP NOUN _ 2 NMOD 2 Stoltzman _ NN NNP NOUN _ 3 SUB 3 tied _ VB VBD VERB _ 0 ROOT 4 the _ DT DT DET _ 5 NMOD 5 composer _ NN NN NOUN _ 3 OBJ 6 in _ RP RP PRT _ 3 VMOD 7 by _ IN IN ADP _ 3 VMOD 8 proclaiming _ VB VBG VERB _ 7 PMOD 9 him _ PR PRP PRON _ 13 SUB 10 `` _ `` `` . _ 13 P 11 the _ DT DT DET _ 13 NMOD 12 great _ JJ JJ ADJ _ 13 NMOD 13 improviser _ NN NN NOUN _ 8 VMOD 14 of _ IN IN ADP _ 13 NMOD 15 the _ DT DT DET _ 17 NMOD 16 18th _ JJ JJ ADJ _ 17 NMOD 17 century _ NN NN NOUN _ 14 PMOD 18 , _ , , . _ 3 P 19 '' _ '' '' . _ 3 P 20 and _ CC CC CONJ _ 3 VMOD 21 then _ RB RB ADV _ 3 VMOD 22 built _ VB VBD VERB _ 3 VMOD 23 on _ IN IN ADP _ 22 VMOD 24 the _ DT DT DET _ 25 NMOD 25 image _ NN NN NOUN _ 23 PMOD 26 by _ IN IN ADP _ 22 VMOD 27 joining _ VB VBG VERB _ 26 PMOD 28 with _ IN IN ADP _ 27 VMOD 29 Mr. _ NN NNP NOUN _ 30 NMOD 30 Douglas _ NN NNP NOUN _ 28 PMOD 31 in _ IN IN ADP _ 27 VMOD 32 some _ DT DT DET _ 35 NMOD 33 Bach _ NN NNP NOUN _ 35 NMOD 34 two-part _ JJ JJ ADJ _ 35 NMOD 35 inventions _ NN NNS NOUN _ 31 PMOD 36 , _ , , . _ 35 P 37 cleverly _ RB RB ADV _ 38 VMOD 38 arranged _ VB VBN VERB _ 35 NMOD 39 for _ IN IN ADP _ 38 VMOD 40 clarinet _ NN NN NOUN _ 42 NMOD 41 and _ CC CC CONJ _ 42 NMOD 42 bassoon _ NN NN NOUN _ 39 PMOD 43 by _ IN IN ADP _ 38 VMOD 44 Mr. _ NN NNP NOUN _ 45 NMOD 45 Douglas _ NN NNP NOUN _ 43 PMOD 46 . _ . . . _ 3 P 1 Keeping _ VB VBG VERB _ 9 VMOD 2 the _ DT DT DET _ 3 NMOD 3 mood _ NN NN NOUN _ 4 SUB 4 light _ JJ JJ ADJ _ 1 VMOD 5 , _ , , . _ 9 P 6 the _ DT DT DET _ 7 NMOD 7 two _ CD CD NUM _ 9 SUB 8 then _ RB RB ADV _ 9 VMOD 9 chanted _ VB VBD VERB _ 0 ROOT 10 and _ CC CC CONJ _ 9 VMOD 11 chortled _ VB VBD VERB _ 9 VMOD 12 their _ PR PRP$ PRON _ 13 NMOD 13 way _ NN NN NOUN _ 9 OBJ 14 through _ IN IN ADP _ 9 VMOD 15 some _ DT DT DET _ 17 NMOD 16 murderous _ JJ JJ ADJ _ 17 NMOD 17 polyrhythms _ NN NNS NOUN _ 14 PMOD 18 , _ , , . _ 17 P 19 devised _ VB VBN VERB _ 17 NMOD 20 by _ IN IN ADP _ 19 VMOD 21 Mr. _ NN NNP NOUN _ 22 NMOD 22 Douglas _ NN NNP NOUN _ 20 PMOD 23 as _ IN IN ADP _ 19 VMOD 24 an _ DT DT DET _ 25 NMOD 25 alternative _ NN NN NOUN _ 23 PMOD 26 to _ TO TO PRT _ 25 NMOD 27 Hindemith _ NN NNP NOUN _ 28 NMOD 28 's _ PO POS PRT _ 31 NMOD 29 dry _ JJ JJ ADJ _ 31 NMOD 30 theory-teaching _ VB VBG VERB _ 31 NMOD 31 techniques _ NN NNS NOUN _ 26 PMOD 32 , _ , , . _ 9 P 33 and _ CC CC CONJ _ 9 VMOD 34 then _ RB RB ADV _ 9 VMOD 35 , _ , , . _ 9 P 36 with _ IN IN ADP _ 40 VMOD 37 Mr. _ NN NNP NOUN _ 38 NMOD 38 Gomez _ NN NNP NOUN _ 36 PMOD 39 , _ , , . _ 40 P 40 soared _ VB VBD VERB _ 9 VMOD 41 and _ CC CC CONJ _ 40 VMOD 42 improvised _ VB VBD VERB _ 40 VMOD 43 on _ IN IN ADP _ 42 VMOD 44 the _ DT DT DET _ 46 NMOD 45 composer _ NN NN NOUN _ 46 NMOD 46 's _ PO POS PRT _ 50 NMOD 47 tight _ JJ JJ ADJ _ 50 NMOD 48 `` _ `` `` . _ 50 P 49 Bebop _ NN NNP NOUN _ 50 NMOD 50 Etudes _ NN NNP NOUN _ 43 PMOD 51 . _ . . . _ 9 P 52 '' _ '' '' . _ 9 P 1 The _ DT DT DET _ 2 NMOD 2 end _ NN NN NOUN _ 10 SUB 3 of _ IN IN ADP _ 2 NMOD 4 the _ DT DT DET _ 6 NMOD 5 first _ JJ JJ ADJ _ 6 NMOD 6 half _ NN NN NOUN _ 3 PMOD 7 , _ , , . _ 10 P 8 however _ RB RB ADV _ 10 VMOD 9 , _ , , . _ 10 P 10 brought _ VB VBD VERB _ 0 ROOT 11 what _ WP WP PRON _ 25 NMOD 12 the _ DT DT DET _ 14 NMOD 13 standing-room-only _ JJ JJ ADJ _ 14 NMOD 14 crowd _ NN NN NOUN _ 15 SUB 15 seemed _ VB VBD VERB _ 11 SBAR 16 to _ TO TO PRT _ 17 VMOD 17 be _ VB VB VERB _ 15 VMOD 18 waiting _ VB VBG VERB _ 17 VC 19 for _ IN IN ADP _ 18 VMOD 20 : _ : : . _ 25 P 21 the _ DT DT DET _ 23 NMOD 22 pop _ NN NN NOUN _ 23 NMOD 23 singer _ NN NN NOUN _ 25 NMOD 24 Judy _ NN NNP NOUN _ 25 NMOD 25 Collins _ NN NNP NOUN _ 10 OBJ 26 , _ , , . _ 25 P 27 who _ WP WP PRON _ 25 NMOD 28 appears _ VB VBZ VERB _ 27 SBAR 29 on _ IN IN ADP _ 28 VMOD 30 `` _ `` `` . _ 29 P 31 Inner _ JJ JJ ADJ _ 32 NMOD 32 Voices _ NN NNS NOUN _ 29 PMOD 33 . _ . . . _ 10 P 34 '' _ '' '' . _ 10 P vowpal-wabbit-8.6.1.dfsg1/demo/dna/000077500000000000000000000000001332666127000170035ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/demo/dna/.gitignore000066400000000000000000000000721332666127000207720ustar00rootroot00000000000000*.gz *.bz2 quaddna2vw *.model *cache* *.predictions *.pid vowpal-wabbit-8.6.1.dfsg1/demo/dna/Makefile000066400000000000000000000012541332666127000204450ustar00rootroot00000000000000CXXFLAGS=-O3 .SECONDARY: all: @cat README clean: rm -f $(wildcard dna*.test.predictions dna*.model) %.check: @test -x "$$(which $*)" || { \ echo "ERROR: you need to install $*" 1>&2; \ exit 1; \ } dna_train.%.bz2: wget.check wget ftp://largescale.ml.tu-berlin.de/largescale/dna/dna_train.$*.bz2 quaddna2vw: quaddna2vw.cpp %.model: dna_train.lab.bz2 dna_train.dat.bz2 quaddna2vw paste.check bzcat.check zsh.check ./do-$*-multicore-train %.test.predictions: %.model dna_train.lab.bz2 dna_train.dat.bz2 quaddna2vw paste.check bzcat.check zsh.check ./do-test $< $@ %.perf: %.test.predictions perf.check perl.check zsh.check ./do-perf $< .PHONY: all clean vowpal-wabbit-8.6.1.dfsg1/demo/dna/README000066400000000000000000000035641332666127000176730ustar00rootroot00000000000000This is the Splice Site recognition dataset from the 2008 Pascal Large Scale Learning Challenge (http://largescale.ml.tu-berlin.de/summary/). === WARNINGS === * you need a beefy machine to comfortably run these demos 4 cores or more SSD or lots of RAM because the demo is I/O bound * the demos are intolerably slow under cygwin (pipes do not work well) === INSTRUCTIONS === * make dna.perf downloads the dna dataset, trains a 4-gram logistic regression in parallel on 4 cores and computes test set statistics disk space requirements: about 3 gigabytes training time requirements: about 7 cores for about 15 minutes ultimately I/O bound: bzcat is the limiting factor memory requirements: less than 512 megabytes results in APR of 0.512 * make dnann.perf same as dna.perf, but with additionally 1 neural network hidden node slower (by circa 60 seconds) but better results in APR of 0.532 * make dnasmash.perf as above but builds a better model uses 10 iteratons of parallel sgd with 4 neural network nodes disk space requirements: about 10 gigabytes additional 7 gigabytes of vw cache on top of original data training time requirements: 16 minutes to build cache over first (ever) pass subsequently, 6 minute per pass if you have SSD or enough RAM cache 10 passes = 60 minutes (x 6 cores) results in APR of 0.545 * make dnahogwild.perf same as dna.perf, but trained via lock-free multicore sgd ("hogwild") rather than parallel sgd + averaging nondeterministic, but a typical result is APR of 0.516 * make dnahogwildnn.perf same as dnann.perf, but trained via lock-free multicore sgd ("hogwild") rather than parallel sgd + averaging nondeterministic, but a typical result is APR of 0.536 vowpal-wabbit-8.6.1.dfsg1/demo/dna/do-dna-multicore-train000077500000000000000000000015771332666127000232210ustar00rootroot00000000000000#! /bin/zsh learner() { ./quaddna2vw | \ ../../vowpalwabbit/vw -f dna.model.$1 \ --loss_function logistic \ -b 18 -l 0.25 --adaptive --invariant \ --total $2 --node $1 --unique_id 0 --span_server localhost \ } set -e ../../cluster/spanning_tree dna_spanning_tree.pid paste -d' ' \ <(bzcat dna_train.lab.bz2) \ <(bzcat dna_train.dat.bz2) | \ tail -n +1000000 | \ ./map \ >(learner 0 4) \ >(learner 1 4 >/dev/null 2>/dev/null) \ >(learner 2 4 >/dev/null 2>/dev/null) \ >(learner 3 4 >/dev/null 2>/dev/null) kill $(cat dna_spanning_tree.pid) wait rm dna_spanning_tree.pid mv dna.model.0 dna.model rm -f dna.model.* vowpal-wabbit-8.6.1.dfsg1/demo/dna/do-dnahogwild-multicore-train000077500000000000000000000020701332666127000245640ustar00rootroot00000000000000#! /bin/zsh rm -f dnahogwild.model set -e nukeem() { \ trap - INT QUIT TERM pkill -9 -f 'vw.*--port 26543' } learner() { ./quaddna2vw | \ netcat localhost 26543 > /dev/null } { ../../vowpalwabbit/vw -f dnahogwild.model \ --loss_function logistic \ -b 18 -l 0.0625 --adaptive --invariant \ --daemon --num_children 4 --port 26543 2>&1 | \ perl -lane 'print $_ unless $c{$F[2]}++;' } & trap 'nukeem; exit 1' INT QUIT TERM while ! netcat -z localhost 26543 do sleep 1 done paste -d' ' \ <(bzcat dna_train.lab.bz2) \ <(bzcat dna_train.dat.bz2) | \ tail -n +1000000 | \ ./map \ >(learner) \ >(learner) \ >(learner) \ >(learner) pkill -f 'vw.*--port 26543' while test ! -s dnahogwild.model do sleep 1 done vowpal-wabbit-8.6.1.dfsg1/demo/dna/do-dnahogwildnn-multicore-train000077500000000000000000000020741332666127000251240ustar00rootroot00000000000000#! /bin/zsh rm -f dnahogwildnn.model set -e nukeem() { \ trap - INT QUIT TERM pkill -9 -f 'vw.*--port 26544' } learner() { ./quaddna2vw | \ netcat localhost 26544 > /dev/null } { ../../vowpalwabbit/vw -f dnahogwildnn.model \ --loss_function logistic --nn 1 --inpass \ -b 18 -l 0.015 --adaptive --invariant \ --daemon --num_children 4 --port 26544 2>&1 | \ perl -lane 'print $_ unless $c{$F[2]}++;' } & trap 'nukeem; exit 1' INT QUIT TERM while ! netcat -z localhost 26544 do sleep 1 done paste -d' ' \ <(bzcat dna_train.lab.bz2) \ <(bzcat dna_train.dat.bz2) | \ tail -n +1000000 | \ ./map \ >(learner) \ >(learner) \ >(learner) \ >(learner) pkill -f 'vw.*--port 26544' while test ! -s dnahogwildnn.model do sleep 1 done vowpal-wabbit-8.6.1.dfsg1/demo/dna/do-dnann-multicore-train000077500000000000000000000016321332666127000235450ustar00rootroot00000000000000#! /bin/zsh learner() { ./quaddna2vw | \ ../../vowpalwabbit/vw -f dnann.model.$1 \ --loss_function logistic \ -b 18 -l 0.06 --adaptive --invariant \ --total $2 --node $1 --unique_id 0 --span_server localhost \ --nn 1 --inpass } set -e ../../cluster/spanning_tree dna_spanning_tree.pid paste -d' ' \ <(bzcat dna_train.lab.bz2) \ <(bzcat dna_train.dat.bz2) | \ tail -n +1000000 | \ ./map \ >(learner 0 4) \ >(learner 1 4 >/dev/null 2>/dev/null) \ >(learner 2 4 >/dev/null 2>/dev/null) \ >(learner 3 4 >/dev/null 2>/dev/null) kill $(cat dna_spanning_tree.pid) wait rm dna_spanning_tree.pid mv dnann.model.0 dnann.model rm -f dnann.model.* vowpal-wabbit-8.6.1.dfsg1/demo/dna/do-dnasmash-multicore-train000077500000000000000000000017641332666127000242530ustar00rootroot00000000000000#! /bin/zsh learner() { ./quaddna2vw | \ ../../vowpalwabbit/vw -f dnasmash.model.$1 \ --loss_function logistic \ --compressed --cache_file dnacache.$1 \ -b 20 -l 0.06 --adaptive --invariant --passes 10 \ --total $2 --node $1 --unique_id 0 --span_server localhost \ --nn 4 --inpass } ../../cluster/spanning_tree dnasmash_spanning_tree.pid paste -d' ' \ <(bzcat dna_train.lab.bz2) \ <(bzcat dna_train.dat.bz2) | \ tail -n +1000000 | \ ./map \ >(learner 0 4) \ >(learner 1 4 >/dev/null 2>/dev/null) \ >(learner 2 4 >/dev/null 2>/dev/null) \ >(learner 3 4 >/dev/null 2>/dev/null) kill $(cat dnasmash_spanning_tree.pid) wait rm dnasmash_spanning_tree.pid mv dnasmash.model.0 dnasmash.model rm -f dnasmash.model.* vowpal-wabbit-8.6.1.dfsg1/demo/dna/do-perf000077500000000000000000000003051332666127000202630ustar00rootroot00000000000000#! /bin/zsh perf -APR -ACC -ROC -t 0 \ -files <(bzcat dna_train.lab.bz2 | head -n 1000000) $1 || { echo "ERROR: you probably need to recompile perf to handle larger data sets" 1>&2 exit 1 } vowpal-wabbit-8.6.1.dfsg1/demo/dna/do-test000077500000000000000000000003721332666127000203120ustar00rootroot00000000000000#! /bin/zsh paste -d' ' \ <(bzcat dna_train.lab.bz2) \ <(bzcat dna_train.dat.bz2) | \ head -n +1000000 | \ ./quaddna2vw | \ ../../vowpalwabbit/vw -t --loss_function logistic -i $1 -p $2 vowpal-wabbit-8.6.1.dfsg1/demo/dna/map000077500000000000000000000004371332666127000175120ustar00rootroot00000000000000#! /usr/bin/env perl use warnings; use strict; use IO::File; my @mappers = map { $_->autoflush (0); $_ } map { new IO::File $_, "w" or die "$_: $!" } @ARGV; while (defined ($_ = )) { my $fh = $mappers[$. % @mappers]; print $fh $_; } vowpal-wabbit-8.6.1.dfsg1/demo/dna/quaddna2vw.cpp000066400000000000000000000020311332666127000215570ustar00rootroot00000000000000#include #include namespace { using namespace std; unsigned int codec (const string::const_iterator& c) { return *c == 'A' ? 0 : *c == 'C' ? 1 : *c == 'G' ? 2 : 3; } } int main (void) { using namespace std; while (! cin.eof ()) { string line; string label; getline (cin, line); if (line.length ()) { string::iterator s = line.begin (); while (*s != ' ') { cout << *s; ++s; } string::const_iterator ppp = s + 1; string::const_iterator pp = ppp + 1; string::const_iterator p = pp + 1; unsigned int offset = 1; cout << " |f"; for (string::const_iterator c = p + 1; c != line.end (); ++ppp, ++pp, ++p, ++c) { unsigned int val = 64 * codec (ppp) + 16 * codec (pp) + 4 * codec (p) + codec (c); cout << " " << offset + val << ":1"; offset += 256; } cout << endl; } } return 0; } vowpal-wabbit-8.6.1.dfsg1/demo/entityrelation/000077500000000000000000000000001332666127000213135ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/demo/entityrelation/README.md000077500000000000000000000006711332666127000226010ustar00rootroot00000000000000Searn for Entity Relation Recognition ------------------------------------- This demo shows the performance of Searn on an entity-relation recognition task. For more details on the data set see http://cogcomp.cs.illinois.edu/page/resource_view/43 ### Instructions ### - `make er.perf`: downloads the preprocessed entity-relation dataset, trains a joint model for predicting entity and relation types and computes test set statistics. vowpal-wabbit-8.6.1.dfsg1/demo/mnist/000077500000000000000000000000001332666127000173735ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/demo/mnist/.gitignore000066400000000000000000000001231332666127000213570ustar00rootroot00000000000000*.gz *.model *.predictions *.swp extractfeatures extractpixels pixelngrams *cache* vowpal-wabbit-8.6.1.dfsg1/demo/mnist/Makefile000066400000000000000000000052311332666127000210340ustar00rootroot00000000000000.SECONDARY: CXXFLAGS=-O3 -Wall help: @cat README RAW = mnist mnisthogwild mnist8m PNG = mnistpng mnist11png mnist8mpng mnist8m11png raw : $(addsuffix .test.confusion, $(RAW)) png : $(addsuffix .test.confusion, $(PNG)) MNIST = mnist mnisthogwild mnistpng mnist11png 8M = mnist8m mnist8mpng mnist8m11png mnist : $(addsuffix .test.confusion, $(MNIST)) mnist8m : $(addsuffix .test.confusion, $(8M)) .PHONY : clean testclean %.test.confusion mnist mnist8m raw png %.check: @test -x "$$(which $*)" || { \ echo "ERROR: you need to install $*" 1>&2; \ exit 1; \ } testclean: rm -f $(wildcard *.test.predictions) clean: rm -f $(wildcard mnist8mpng.*) $(wildcard mnist8m11png.*) $(wildcard mnist11png.*) $(wildcard mnistpng.*) $(wildcard mnist8m.*) $(wildcard mnist.*) # http://leon.bottou.org/projects/infimnist infimnist.tar.gz : wget.check wget http://leon.bottou.org/_media/projects/$@ infimnist : infimnist.tar.gz tar xvfz $< make -C $@ train8m-labels-idx1-ubyte.gz: infimnist (cd infimnist && ./infimnist lab 10000 8109999) | gzip > $@ train8m-images-idx3-ubyte.gz: infimnist (cd infimnist && ./infimnist pat 10000 8109999) | gzip > $@ train-%.gz: wget.check wget http://yann.lecun.com/exdb/mnist/train-$*.gz t10k-%.gz: wget.check wget http://yann.lecun.com/exdb/mnist/t10k-$*.gz pixelngrams: pixelngrams.cpp extractpixels: extractpixels.cpp extractfeatures: extractfeatures.cpp %png.model: train-labels-idx1-ubyte.gz train-images-idx3-ubyte.gz extractpixels pixelngrams zsh.check perl.check paste.check ./do-$*png-train mnist.model: train-labels-idx1-ubyte.gz train-images-idx3-ubyte.gz extractfeatures zsh.check perl.check paste.check ./do-mnist-train mnisthogwild.model: train-labels-idx1-ubyte.gz train-images-idx3-ubyte.gz extractfeatures zsh.check perl.check paste.check ./do-mnist-hogwild-train mnist8m.model: train8m-labels-idx1-ubyte.gz train8m-images-idx3-ubyte.gz extractfeatures zsh.check perl.check paste.check ./do-mnist8m-train %png.test.predictions: %png.model t10k-labels-idx1-ubyte.gz t10k-images-idx3-ubyte.gz zsh.check perl.check paste.check ./do-pixelngram-test $*png %.test.predictions: %.model t10k-labels-idx1-ubyte.gz t10k-images-idx3-ubyte.gz zsh.check perl.check paste.check ./do-test $* CONFUSION='++$$n; \ ++$$c if $$F[0] != $$F[1]; \ ++$$m{"@{[int($$F[0])]}:$$F[1]"}; } { \ print "$* test errors: $$c out of $$n"; \ print "confusion matrix (rows = truth, columns = prediction):"; \ foreach $$true (1 .. 10) { \ print join "\t", map { $$m{"$$true:$$_"} || 0 } (1 .. 10); \ }' %.test.confusion: %.test.predictions perl.check @perl -lane $(CONFUSION) $*.test.predictions vowpal-wabbit-8.6.1.dfsg1/demo/mnist/README000066400000000000000000000061061332666127000202560ustar00rootroot00000000000000mnist (http://yann.lecun.com/exdb/mnist/) is the traditional set for testing neural network implementations. mnist8m (http://leon.bottou.org/papers/loosli-canu-bottou-2006) is a variant of the original mnist training set augmented with deformations. see the dna demo directory for an example of distributed neural network training. === INSTRUCTIONS === --- starting from raw pixels --- * make mnist.test.confusion this will download the mnist training and test sets train a neural network model on mnist from raw pixels evaluate on the original mnist test set and report a confusion matrix results in test errors: 219 out of 10000 disk requirements: about 10 megabytes training time requirements: about 5 minutes on 1 core memory requirements: less than 256 megabytes * make mnisthogwild.test.confusion same as above but "hogwild" training (uses all cores) results in test errors: 203 out of 10000 (typically, but not deterministic) disk requirements: about 10 megabytes training time requirements: about 3 minutes on 4 cores memory requirements: less than 256 megabytes * make mnist8m.test.confusion this will download the mnist test set and the mnist8m training set train a neural network model on mnist8m from raw pixels evaluate on the original mnist test set and report a confusion matrix results in test errors: 152 out of 10000 disk requirements: about 2 gigabytes training time requirements: about an hour on one core memory requirements: about 1.6g (to run infinimist) --- pixel n-grams --- * make mnistpng.test.confusion this will download the mnist training and test sets train a model on mnist linear in pixel n-grams evaluate on the original mnist test set and report a confusion matrix results in test errors: 175 out of 10000 disk requirements: about 10 megabytes training time requirements: about a minute on 3 cores memory requirements: less than 256 megabytes * make mnist11png.test.confusion ibid with 2 neural network hidden units ("goes to eleven") results in test errors: 166 out of 10000 disk requirements: about 10 megabytes training time requirements: about 3 minutes on 3 cores memory requirements: less than 256 megabytes * make mnist8mpng.test.confusion this will download the mnist test set and the mnist8m training set train a model on mnist8m linear in pixel n-grams evaluate on the original mnist test set and report a confusion matrix results in test errors: 140 out of 10000 disk requirements: about 2 gigabytes training time requirements: about an hour on 4 cores memory requirements: less than 256 megabytes * make mnist8m11png.test.confusion ibid with 5 neural network hidden units ("goes to eleven") results in test errors: 107 out of 10000 disk requirements: about 2 gigabytes training time requirements: about two hours on 4 cores memory requirements: less than 256 megabytes vowpal-wabbit-8.6.1.dfsg1/demo/mnist/do-mnist-hogwild-train000077500000000000000000000032351332666127000236240ustar00rootroot00000000000000#! /bin/zsh test -e train-images-idx3-ubyte.gz || { echo "ERROR: you need to download train-image-idx3-ubyte.gz" 1>&2 echo "ERROR: from http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz" 1>&2 exit 1 } test -e train-labels-idx1-ubyte.gz || { echo "ERROR: you need to download train-labels-idx1-ubyte.gz" 1>&2 echo "ERROR: from http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz" 1>&2 exit 1 } set -e nukeem() { \ trap - INT QUIT TERM pkill -9 -f 'vw.*--port 26544' } learner() { netcat localhost 26544 > /dev/null } { ../../vowpalwabbit/vw --oaa 10 -f mnisthogwild.model \ -b 24 --adaptive --invariant --holdout_off \ -l 0.08 --nn 40 \ --daemon --num_children 4 --port 26544 2>&1 | \ perl -lane 'print $_ unless $c{$F[2]}++;' } & trap 'nukeem; exit 1' INT QUIT TERM while ! netcat -z localhost 26544 do sleep 1 done SHUFFLE='BEGIN { srand 69; }; $i = int rand 1000; print $b[$i] if $b[$i]; $b[$i] = $_; } { print grep { defined $_ } @b;' ( for pass in $(seq 1 24) do paste -d' ' \ <(gunzip -c train-labels-idx1-ubyte.gz | ./extract-labels) \ <(gunzip -c train-images-idx3-ubyte.gz | ./extractfeatures) done ) | \ perl -ne ${SHUFFLE} | \ time ./map >(learner) >(learner) >(learner) >(learner) pkill -f 'vw.*--port 26544' while test ! -s mnisthogwild.model do sleep 1 done vowpal-wabbit-8.6.1.dfsg1/demo/mnist/do-mnist-train000077500000000000000000000021321332666127000221640ustar00rootroot00000000000000#! /bin/zsh test -e train-images-idx3-ubyte.gz || { echo "ERROR: you need to download train-image-idx3-ubyte.gz" 1>&2 echo "ERROR: from http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz" 1>&2 exit 1 } test -e train-labels-idx1-ubyte.gz || { echo "ERROR: you need to download train-labels-idx1-ubyte.gz" 1>&2 echo "ERROR: from http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz" 1>&2 exit 1 } SHUFFLE='BEGIN { srand 69; }; $i = int rand 1000; print $b[$i] if $b[$i]; $b[$i] = $_; } { print grep { defined $_ } @b;' paste -d' ' \ <(gunzip -c train-labels-idx1-ubyte.gz | ./extract-labels) \ <(gunzip -c train-images-idx3-ubyte.gz | ./extractfeatures) | \ perl -ne ${SHUFFLE} | \ time ../../vowpalwabbit/vw --oaa 10 -f mnist.model \ -b 24 --adaptive --invariant --holdout_off \ -l 0.1 --nn 40 --passes 24 -k --compressed --cache_file mnist.cache && \ rm -f mnist.cache vowpal-wabbit-8.6.1.dfsg1/demo/mnist/do-mnist11png-train000077500000000000000000000023641332666127000230420ustar00rootroot00000000000000#! /bin/zsh test -e train-images-idx3-ubyte.gz || { echo "ERROR: you need to download train-image-idx3-ubyte.gz" 1>&2 echo "ERROR: from http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz" 1>&2 exit 1 } test -e train-labels-idx1-ubyte.gz || { echo "ERROR: you need to download train-labels-idx1-ubyte.gz" 1>&2 echo "ERROR: from http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz" 1>&2 exit 1 } SHUFFLE='BEGIN { srand 69; }; $i = int rand 1000; print $b[$i] if $b[$i]; $b[$i] = $_; } { print grep { defined $_ } @b;' paste -d' ' \ <(gunzip -c train-labels-idx1-ubyte.gz | ./extract-labels) \ <(gunzip -c train-images-idx3-ubyte.gz | ./extractpixels) | \ perl -ne ${SHUFFLE} | \ ./roundrobin ./pixelngrams 2 | \ time ../../vowpalwabbit/vw --oaa 10 -f mnist11png.model \ -b 20 --adaptive --invariant \ --nn 2 --inpass --holdout_off \ -l 0.02 --passes 15 -k --compressed --cache_file mnist11png.cache && \ rm -f mnist11png.cache vowpal-wabbit-8.6.1.dfsg1/demo/mnist/do-mnist8m-train000077500000000000000000000015341332666127000224360ustar00rootroot00000000000000#! /bin/zsh test -e train8m-images-idx3-ubyte.gz || { echo "ERROR: you need to download train8m-image-idx3-ubyte.gz" 1>&2 echo "ERROR: from http://ml.nec-labs.com/download/data/mnist8m/train8m-images-idx3-ubyte.gz" 1>&2 exit 1 } test -e train8m-labels-idx1-ubyte.gz || { echo "ERROR: you need to download train8m-labels-idx1-ubyte.gz" 1>&2 echo "ERROR: from http://ml.nec-labs.com/download/data/mnist8m/train8m-labels-idx1-ubyte.gz" 1>&2 exit 1 } paste -d' ' \ <(gunzip -dc train8m-labels-idx1-ubyte.gz | ./extract-labels) \ <(gunzip -dc train8m-images-idx3-ubyte.gz | ./extractfeatures) | \ time ../../vowpalwabbit/vw --oaa 10 -f mnist8m.model \ -b 24 --adaptive --invariant --holdout_off \ -l 0.1 --nn 40 vowpal-wabbit-8.6.1.dfsg1/demo/mnist/do-mnist8m11png-train000077500000000000000000000022771332666127000233120ustar00rootroot00000000000000#! /bin/zsh test -e train8m-images-idx3-ubyte.gz || { echo "ERROR: you need to download train8m-image-idx3-ubyte.gz" 1>&2 echo "ERROR: from http://ml.nec-labs.com/download/data/mnist8m/train8m-images-idx3-ubyte.gz" 1>&2 exit 1 } test -e train8m-labels-idx1-ubyte.gz || { echo "ERROR: you need to download train8m-labels-idx1-ubyte.gz" 1>&2 echo "ERROR: from http://ml.nec-labs.com/download/data/mnist8m/train8m-labels-idx1-ubyte.gz" 1>&2 exit 1 } SHUFFLE='BEGIN { srand 69; }; $i = int rand 1000; print $b[$i] if $b[$i]; $b[$i] = $_; } { print grep { defined $_ } @b;' paste -d' ' \ <(gunzip -c train8m-labels-idx1-ubyte.gz | ./extract-labels) \ <(gunzip -c train8m-images-idx3-ubyte.gz | ./extractpixels) | \ perl -ne ${SHUFFLE} | \ ./roundrobin ./pixelngrams 3 | \ time ../../vowpalwabbit/vw --oaa 10 -f mnist8m11png.model \ -b 20 --adaptive --invariant --holdout_off \ --nn 5 --inpass \ -l 0.05 vowpal-wabbit-8.6.1.dfsg1/demo/mnist/do-mnist8mpng-train000077500000000000000000000021651332666127000231440ustar00rootroot00000000000000#! /bin/zsh test -e train8m-images-idx3-ubyte.gz || { echo "ERROR: you need to download train8m-image-idx3-ubyte.gz" 1>&2 echo "ERROR: from http://ml.nec-labs.com/download/data/mnist8m/train8m-images-idx3-ubyte.gz" 1>&2 exit 1 } test -e train8m-labels-idx1-ubyte.gz || { echo "ERROR: you need to download train8m-labels-idx1-ubyte.gz" 1>&2 echo "ERROR: from http://ml.nec-labs.com/download/data/mnist8m/train8m-labels-idx1-ubyte.gz" 1>&2 exit 1 } SHUFFLE='BEGIN { srand 69; }; $i = int rand 1000; print $b[$i] if $b[$i]; $b[$i] = $_; } { print grep { defined $_ } @b;' paste -d' ' \ <(gunzip -c train8m-labels-idx1-ubyte.gz | ./extract-labels) \ <(gunzip -c train8m-images-idx3-ubyte.gz | ./extractpixels) | \ perl -ne ${SHUFFLE} | \ ./roundrobin ./pixelngrams 3 | \ time ../../vowpalwabbit/vw --oaa 10 -f mnist8mpng.model \ -b 20 --adaptive --invariant --holdout_off \ -l 0.05 vowpal-wabbit-8.6.1.dfsg1/demo/mnist/do-mnistpng-train000077500000000000000000000022501332666127000226720ustar00rootroot00000000000000#! /bin/zsh test -e train-images-idx3-ubyte.gz || { echo "ERROR: you need to download train-image-idx3-ubyte.gz" 1>&2 echo "ERROR: from http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz" 1>&2 exit 1 } test -e train-labels-idx1-ubyte.gz || { echo "ERROR: you need to download train-labels-idx1-ubyte.gz" 1>&2 echo "ERROR: from http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz" 1>&2 exit 1 } SHUFFLE='BEGIN { srand 69; }; $i = int rand 1000; print $b[$i] if $b[$i]; $b[$i] = $_; } { print grep { defined $_ } @b;' paste -d' ' \ <(gunzip -c train-labels-idx1-ubyte.gz | ./extract-labels) \ <(gunzip -c train-images-idx3-ubyte.gz | ./extractpixels) | \ perl -ne ${SHUFFLE} | \ ./roundrobin ./pixelngrams 2 | \ time ../../vowpalwabbit/vw --oaa 10 -f mnistpng.model \ -b 20 --adaptive --invariant --holdout_off \ -l 0.05 --passes 15 -k --compressed --cache_file mnistpng.cache && \ rm -f mnistpng.cache vowpal-wabbit-8.6.1.dfsg1/demo/mnist/do-pixelngram-test000077500000000000000000000015631332666127000230510ustar00rootroot00000000000000#! /bin/zsh test -e $1.model || { echo "ERROR: you need to train a model" 1>&2 echo "ERROR: run do-${1}-train in this directory" 1>&2 exit 1 } test -e t10k-labels-idx1-ubyte.gz || { echo "ERROR: you need to download t10k-labels-idx1-ubyte.gz" 1>&2 echo "ERROR: from http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz" 1>&2 exit 1 } test -e t10k-images-idx3-ubyte.gz || { echo "ERROR: you need to download t10k-images-idx3-ubyte.gz" 1>&2 echo "ERROR: from http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz" 1>&2 exit 1 } paste -d' ' \ <(gunzip -c t10k-labels-idx1-ubyte.gz | ./extract-labels) \ <(gunzip -c t10k-images-idx3-ubyte.gz | \ ./extractpixels | ./pixelngrams) | \ ../../vowpalwabbit/vw -t -i $1.model -p $1.test.predictions vowpal-wabbit-8.6.1.dfsg1/demo/mnist/do-test000077500000000000000000000014731332666127000207050ustar00rootroot00000000000000#! /bin/zsh test -e $1.model || { echo "ERROR: you need to train a model" 1>&2 echo "ERROR: run do-${1}-train in this directory" 1>&2 exit 1 } test -e t10k-labels-idx1-ubyte.gz || { echo "ERROR: you need to download t10k-labels-idx1-ubyte.gz" 1>&2 echo "ERROR: from http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz" 1>&2 exit 1 } test -e t10k-images-idx3-ubyte.gz || { echo "ERROR: you need to download t10k-images-idx3-ubyte.gz" 1>&2 echo "ERROR: from http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz" 1>&2 exit 1 } paste -d' ' \ <(gunzip -c t10k-labels-idx1-ubyte.gz | ./extract-labels) \ <(gunzip -c t10k-images-idx3-ubyte.gz | ./extractfeatures) | \ ../../vowpalwabbit/vw -t -i $1.model -p $1.test.predictions vowpal-wabbit-8.6.1.dfsg1/demo/mnist/extract-labels000077500000000000000000000010571332666127000222360ustar00rootroot00000000000000#! /usr/bin/env perl use warnings; use strict; use IO::File; local $SIG{__WARN__} = sub {}; #my $fname = shift @ARGV or die; #my $fh = new IO::File $fname, "r" or die "$fname: $!"; my $fh = \*STDIN; binmode ($fh, ':raw'); $/ = \4; my $magic = unpack ("N", <$fh>); die "wtf $magic" unless $magic == 2049; my $n_items = unpack ("N", <$fh>); warn "n_items = $n_items"; $/ = \1; while (defined ($_ = <$fh>)) { die "wtf @{[length($_)]}" unless length ($_) == 1; my ($label) = unpack ("C", $_); ++$label; print "$label 1 $label|\n"; } vowpal-wabbit-8.6.1.dfsg1/demo/mnist/extractfeatures.cpp000066400000000000000000000021601332666127000233070ustar00rootroot00000000000000#include #include #include #include #include #include int main (void) { using std::cin; using std::cout; using std::endl; using std::setprecision; uint32_t magic; cin.read (reinterpret_cast (&magic), sizeof (uint32_t)); magic = ntohl (magic); assert (magic == 2051); uint32_t n_images; cin.read (reinterpret_cast (&n_images), sizeof (uint32_t)); n_images = ntohl (n_images); uint32_t n_rows; cin.read (reinterpret_cast (&n_rows), sizeof (uint32_t)); n_rows = ntohl (n_rows); uint32_t n_columns; cin.read (reinterpret_cast (&n_columns), sizeof (uint32_t)); n_columns = ntohl (n_columns); uint32_t rc = n_rows * n_columns; unsigned char buf[rc]; for (cin.read (reinterpret_cast (buf), rc); ! cin.eof (); cin.read (reinterpret_cast (buf), rc)) { cout << "|p"; for (unsigned int p = 0; p < n_rows * n_columns; ++p) { if (buf[p]) cout << " " << p << ":" << setprecision (8) << static_cast(buf[p])/256.0; } cout << endl; } return 0; } vowpal-wabbit-8.6.1.dfsg1/demo/mnist/extractpixels.cpp000066400000000000000000000021111332666127000227710ustar00rootroot00000000000000#include #include #include #include #include #include int main (void) { using std::cin; using std::cout; using std::endl; using std::setprecision; uint32_t magic; cin.read (reinterpret_cast (&magic), sizeof (uint32_t)); magic = ntohl (magic); assert (magic == 2051); uint32_t n_images; cin.read (reinterpret_cast (&n_images), sizeof (uint32_t)); n_images = ntohl (n_images); uint32_t n_rows; cin.read (reinterpret_cast (&n_rows), sizeof (uint32_t)); n_rows = ntohl (n_rows); uint32_t n_columns; cin.read (reinterpret_cast (&n_columns), sizeof (uint32_t)); n_columns = ntohl (n_columns); uint32_t rc = n_rows * n_columns; unsigned char buf[rc]; for (cin.read (reinterpret_cast (buf), rc); ! cin.eof (); cin.read (reinterpret_cast (buf), rc)) { for (unsigned int p = 0; p < n_rows * n_columns; ++p) { if (buf[p]) cout << " " << p << ":" << static_cast(buf[p]); } cout << endl; } return 0; } vowpal-wabbit-8.6.1.dfsg1/demo/mnist/map000077500000000000000000000004371332666127000201020ustar00rootroot00000000000000#! /usr/bin/env perl use warnings; use strict; use IO::File; my @mappers = map { $_->autoflush (0); $_ } map { new IO::File $_, "w" or die "$_: $!" } @ARGV; while (defined ($_ = )) { my $fh = $mappers[$. % @mappers]; print $fh $_; } vowpal-wabbit-8.6.1.dfsg1/demo/mnist/pixelngrams.cpp000066400000000000000000000065441332666127000224410ustar00rootroot00000000000000#include #include #include #include #include #include #include namespace { //====================================================================== // vhd_ngrams = // = // vertical, horizontal, and diagonal 2-skip-2 grams. = //======================================================================/ void vhd_ngrams (unsigned char*const buf, size_t p, size_t stride, size_t total, double* v) { double x = static_cast (buf[p]) / 256.0; double xp1 = static_cast (buf[(p + 1) % total]) / 256.0; double xp2 = static_cast (buf[(p + 2) % total]) / 256.0; double xp3 = static_cast (buf[(p + 3) % total]) / 256.0; double xd1 = static_cast (buf[(p + stride) % total]) / 256.0; double xd2 = static_cast (buf[(p + 2 * stride) % total]) / 256.0; double xd3 = static_cast (buf[(p + 3 * stride) % total]) / 256.0; double xpd1 = static_cast (buf[(p + stride + 1) % total]) / 256.0; double xpd2 = static_cast (buf[(p + 2 * (stride + 1)) % total]) / 256.0; double xpd3 = static_cast (buf[(p + 3 * (stride + 1)) % total]) / 256.0; double xmd1 = static_cast (buf[(p + stride - 1) % total]) / 256.0; double xmd2 = static_cast (buf[(p + 2 * (stride - 1)) % total]) / 256.0; double xmd3 = static_cast (buf[(p + 3 * (stride - 1)) % total]) / 256.0; v[0] = ::sqrt (x * xp1); v[1] = ::sqrt (x * xd1); v[2] = ::sqrt (x * xpd1); v[3] = ::sqrt (x * xmd1); v[4] = ::sqrt (x * xp2); v[5] = ::sqrt (x * xd2); v[6] = ::sqrt (x * xpd2); v[7] = ::sqrt (x * xmd2); v[8] = ::sqrt (x * xp3); v[9] = ::sqrt (x * xd3); v[10] = ::sqrt (x * xpd3); v[11] = ::sqrt (x * xmd3); } } int main (void) { using std::cin; using std::cout; using std::getline; using std::endl; using std::flush; using std::string; size_t n_rows = 28; size_t n_columns = 28; size_t rc = n_rows * n_columns; unsigned char buf[n_rows * n_columns]; while (! cin.eof ()) { string line; getline (cin, line); if (line.length ()) { unsigned int feature; unsigned int value; size_t start = 0; int increment; char* pipe = const_cast (strchr (line.c_str (), '|')); if (pipe) { *pipe++ = '\0'; cout << line.c_str (); start = pipe - line.c_str (); } ::memset (buf, 0, n_rows * n_columns * sizeof (buf[0])); while (::sscanf (line.c_str () + start, "%u:%u%n", &feature, &value, &increment) >= 2) { buf[feature] = value; start += increment; } size_t offset = 1; cout << "|p"; for (unsigned int p = 0; p < n_rows * n_columns; ++p) { if (buf[p]) { cout << " " << offset << ":" << static_cast(buf[p])/256.0; double v[12]; vhd_ngrams (buf, p, n_rows, rc, v); for (unsigned int j = 0; j < 12; ++j) { if (v[j]) { cout << " " << offset + 1 + j << ":" << v[j]; } } } offset += 13; } cout << endl << flush; } } return 0; } vowpal-wabbit-8.6.1.dfsg1/demo/mnist/roundrobin000077500000000000000000000033021332666127000215000ustar00rootroot00000000000000#! /usr/bin/env perl use warnings; use strict; use IO::File; use IO::Pipe; use IO::Select; sub setup_child ($$$) { my ($command, $n, $t) = @_; my $pipec2p = new IO::Pipe; my $pipep2c = new IO::Pipe; my $pid = fork (); if ($pid) # parent { $pipec2p->reader (); $pipep2c->writer (); return [ $pipec2p, $pipep2c ]; } else # child { $pipec2p->writer (); $pipep2c->reader (); my $writefno = $pipec2p->fileno (); my $readfno = $pipep2c->fileno (); open STDIN, "<&$readfno" or die "can't dup $readfno to STDIN"; open STDOUT, ">&$writefno" or die "can't dup $writefno to STDOUT"; exec $command, $n, $t or die "can't exec $command"; } } my $command = shift @ARGV or die; my $total = shift @ARGV or die; my @mappers = map { setup_child ($command, $_, $total) } 1 .. $total; my @read = map { my $s = new IO::Select (); $s->add ($_->[0]); $s } @mappers; my @write = map { my $s = new IO::Select (); $s->add ($_->[1]); $s } @mappers; my $in = 0; my $out = 0; while (defined ($_ = )) { while (! $write[$out % @write]->can_write (0.001)) { while ($read[$in % @read]->can_read (0)) { my $fh = $mappers[$in % @mappers]->[0]; print $fh->getline (); ++$in; } } my $fh = $mappers[$out % @write]->[1]; $fh->printflush ($_); ++$out; } foreach my $m (@mappers) { my $fh = $m->[1]; $fh->close (); } while ($in < $out) { my $fh = $mappers[$in % @mappers]->[0]; print $fh->getline (); ++$in; } vowpal-wabbit-8.6.1.dfsg1/demo/movielens/000077500000000000000000000000001332666127000202425ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/demo/movielens/.gitignore000066400000000000000000000001531332666127000222310ustar00rootroot00000000000000*.results *.results.model *.results.model.txt ml-1m ml-1m.ratings.train.vw ml-1m.ratings.test.vw ml-1m.zip vowpal-wabbit-8.6.1.dfsg1/demo/movielens/Makefile000066400000000000000000000150261332666127000217060ustar00rootroot00000000000000SHELL=/bin/zsh VW=../../vowpalwabbit/vw .SECONDARY: all: @cat README.md shootout: $(foreach what,linear lrq lrqdropout lrqdropouthogwild,$(what).print) clean: rm -f $(wildcard *results*) $(wildcard *.vw) $(wildcard *.model.txt) $(wildcard *.pdf) ml-%.zip: @echo "downloading movielens $*" @wget http://files.grouplens.org/datasets/movielens/ml-$*.zip ml-%/ratings.dat: ml-%.zip @rm -rf ml-$* @unzip -qq $< @(test -d ml-10M100K && mv -f ml-10M100K ml-10m) || true @rm -rf __MACOSX @touch ml-$*/* ml-%.ratings.train.vw: ml-%/ratings.dat @echo -n "preprocessing movielens $* ..." @./ratings2vw ml-$*.ratings.pre.train.vw ml-$*.ratings.test.vw $< @perl -ne 'BEGIN { srand 8675309; }; \ 1; print join "\t", rand (), $$_;' \ ml-$*.ratings.pre.train.vw | sort -k1 | \ cut -f2- > ml-$*.ratings.train.vw @rm -f ml-$*.ratings.pre.train.vw @echo " complete" %.test.vw: %.train.vw @true %.print: %.results @printf "%s test MAE is %3.3f\n" $* $$(cat $*.results) #--------------------------------------------------------------------- # linear model (no interaction terms) #--------------------------------------------------------------------- linear.results: ml-1m.ratings.test.vw ml-1m.ratings.train.vw @echo "****************************************************" @echo "* training linear model (no interaction terms) *" @echo "****************************************************" @echo @${VW} --loss_function quantile -l 1 -b 24 --passes 100 \ -k --cache_file $@.cache -d $(word 2,$+) --holdout_off \ --adaptive --invariant -f $@.model @echo "****************************************************" @echo "* testing linear model (no interaction terms) *" @echo "****************************************************" @echo @${VW} --loss_function quantile -t -i $@.model \ -d $(word 1,$+) -p \ >(perl -lane '$$s+=abs(($$F[0]-$$F[1])); } { \ 1; print $$s/$$.;' > $@) @echo "****************************************************" @echo "* saving human readable model (--invert_hash) *" @echo "****************************************************" @echo @${VW} -i $@.model -t --invert_hash $@.model.txt -d $(word 2,$+) @rm -f $@.cache $@.model #--------------------------------------------------------------------- # low-rank interaction model (without dropout) #--------------------------------------------------------------------- lrq.results: ml-1m.ratings.test.vw ml-1m.ratings.train.vw @echo "*********************************************************" @echo "* training low-rank interaction model (without dropout) *" @echo "* *" @echo "* vw --lrq um7 ... *" @echo "*********************************************************" @echo @${VW} --loss_function quantile -l 0.1 -b 24 --passes 100 \ -k --cache_file $@.cache -d $(word 2,$+) --holdout_off \ --power_t 0.333 --l2 1.25e-7 --lrq um7 --adaptive --invariant -f $@.model @echo "********************************************************" @echo "* testing low-rank interaction model (without dropout) *" @echo "********************************************************" @echo @${VW} --loss_function quantile -t -i $@.model \ -d $(word 1,$+) -p \ >(perl -lane '$$s+=abs(($$F[0]-$$F[1])); } { \ 1; print $$s/$$.;' > $@) @echo "****************************************************" @echo "* saving human readable model (--invert_hash) *" @echo "****************************************************" @echo @${VW} -i $@.model -t --invert_hash $@.model.txt -d $(word 2,$+) @rm -f $@.cache $@.model #--------------------------------------------------------------------- # low-rank interaction model (with dropout) #--------------------------------------------------------------------- lrqdropout.results: ml-1m.ratings.test.vw ml-1m.ratings.train.vw @echo "******************************************************" @echo "* training low-rank interaction model (with dropout) *" @echo "* *" @echo "* vw --lrq um14 --lrqdropout ... *" @echo "******************************************************" @echo @${VW} --loss_function quantile -l 0.45 -b 24 --passes 100 \ -k --cache_file $@.cache -d $(word 2,$+) --holdout_off \ --lrq um14 --lrqdropout --adaptive --invariant -f $@.model @echo "*****************************************************" @echo "* testing low-rank interaction model (with dropout) *" @echo "*****************************************************" @echo @${VW} --loss_function quantile -t -i $@.model \ -d $(word 1,$+) -p \ >(perl -lane '$$s+=abs(($$F[0]-$$F[1])); } { \ 1; print $$s/$$.;' > $@) @echo "****************************************************" @echo "* saving human readable model (--invert_hash) *" @echo "****************************************************" @echo @${VW} -i $@.model -t --invert_hash $@.model.txt -d $(word 2,$+) @rm -f $@.cache $@.model movie_dendrogram.pdf: lrqdropout.results @echo "*******************************************************" @echo "* Generating movie clustering based on latent factors *" @echo "* (Requires an installation of R) *" @echo "*******************************************************" @grep ^lrq^m $<.model.txt | tr '^' ':' > $<.model.csv @Rscript visualize_factors.R @rm -f $@.model.csv #--------------------------------------------------------------------- # low-rank interaction model (with dropout) # (HOGWILD training mode) #--------------------------------------------------------------------- lrqdropouthogwild.results: ml-1m.ratings.test.vw ml-1m.ratings.train.vw do-lrq-hogwild @echo "******************************************************" @echo "* training low-rank interaction model (with dropout) *" @echo "* (HOGWILD training mode) *" @echo "* *" @echo "* vw --lrq um14 --lrqdropout ... *" @echo "******************************************************" @echo @./do-lrq-hogwild $@.model @echo "*****************************************************" @echo "* testing low-rank interaction model (with dropout) *" @echo "*****************************************************" @echo @${VW} --loss_function quantile -t -i $@.model \ -d $(word 1,$+) -p \ >(perl -lane '$$s+=abs(($$F[0]-$$F[1])); } { \ 1; print $$s/$$.;' > $@) .PHONY: all clean shootout vowpal-wabbit-8.6.1.dfsg1/demo/movielens/README.md000077500000000000000000000073331332666127000215320ustar00rootroot00000000000000Low rank quadratic demo ------------------------------- This demo shows a low-rank approximation to an interaction design matrix for the [movielens-1M](http://files.grouplens.org/papers/ml-10m-README.html) dataset. ### About low-rank interactions ### In movielens-1M, a user has at most one rating per movie, and therefore a full interaction design between these two variables (in `vw` syntax: `-q um`) fundamentally cannot generalize. Since this situation arises in recommendation systems, low-rank approximations to interaction terms rose to prominence in the recommendation community, under the moniker "matrix factorization". However, the technique is also appropriate in non-recommendation settings, e.g., when the interaction between two high cardinality categorical variables is desired but the available data is too sparse to learn a full interaction model. There is a great piece of software called [libfm](http://www.libfm.org/) whose raison d'etre is to fit low-rank approximations to interaction designs, and the main author [Steffen Rendle](http://www.kaggle.com/users/25112/steffen-rendle) does quite well on Kaggle. Imitation is the best form of flattery. ### How it works ### If you have two namespaces `a` and `b`, instead of the full interaction design enabled by specifying `-q ab`, you can have a rank-k interaction design by specifying `--lrq abk`. Additionally specifying `--lrqdropout` trains with dropout which sometimes works better. When using dropout the best performing rank tends to be about twice as big as without dropout. You might find a bit of `--l2` regularization improves generalization. ### Demo Instructions ### - `make shootout`: eventually produces four results indicating test MAE (mean absolute error) on movielens-1M for - linear: a model without any interactions. basically this creates a user bias and item bias fit. this is a surprisingly strong baseline in terms of MAE, but is useless for recommendation as it induces the same item ranking for all users. It achieves test MAE of 0.731. - lrq: the linear model augmented with rank-7 interactions between users and movies, aka, "seven latent factors". It achieves test MAE of 0.709. I determined that 7 was the best number to use through experimentation. The key additional `vw` command-line flags vs. the linear model are `--l2 1.25e-7 --lrq um7`. Performance is sensitive to the choice of `--l2` regularization strength. - lrqdropout: the linear model augmented with rank-14 interactions between users and movies, and trained with dropout. It achieves test MAE of 0.689. The key additional `vw` command-line flags vs. the linear model are `--lrq um14 --lrqdropout`. - lrqdropouthogwild: same as lrqdropout, but trained in parallel on multiple cores without locking, a la [Niu et. al.](http://www.eecs.berkeley.edu/~brecht/papers/hogwildTR.pdf). Test MAE is nondeterministic but typically equivalent to lrqdropout. The main purpose of this demo is to instruct on how to achieve lock-free parallel learning. (Note using the cache and a single training core can be faster than using multiple cores and parsing continuously. However in some cases data is generated dynamically in such volume that the cache is not practical, thus this technique is helpful.) - the first time you invoke `make shootout` there is a lot of other output. invoking it a second time will allow you to just see the cached results. - `make movie_dendrogram.pdf` will produce a couple of PDFs with hierarchical clustering of the movies based on the latent factors found by `--lrq`. It serves as an example on how to extract the latent factors from an `--invert_hash` file. You will need to zoom in in the large dendrogram to find the movie names. Details about how `vw` is invoked is in the `Makefile`. vowpal-wabbit-8.6.1.dfsg1/demo/movielens/do-lrq-hogwild000077500000000000000000000020111332666127000230130ustar00rootroot00000000000000#! /bin/zsh nukeem() { \ trap - INT QUIT TERM pkill -9 -f 'vw.*--port 26542' } if which netcat 2>/dev/null; then netcat=netcat elif which nc 2>/dev/null; then netcat=nc else echo "Install netcat or nc" >&2 exit 1 fi rm -f "$1" { ../../vowpalwabbit/vw --loss_function quantile -l 0.45 -b 24 --holdout_off --lrq um14 --lrqdropout --adaptive --invariant -f "$1" --daemon --num_children 4 --port 26542 2>&1 | perl -lane 'print $_ unless $c{$F[2]}++; ' } & trap 'nukeem; exit 1' INT QUIT TERM while ! $netcat -z localhost 26542 do sleep 1 done for pass in $(seq 1 100) do cat ml-1m.ratings.train.vw done | \ ./map \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) pkill -f 'vw.*--port 26542' while test ! -s "$1" do sleep 1 done vowpal-wabbit-8.6.1.dfsg1/demo/movielens/map000077500000000000000000000006411332666127000207460ustar00rootroot00000000000000#! /usr/bin/env perl use warnings; use strict; use IO::File; my @mappers = map { $_->autoflush (0); $_ } map { new IO::File $_, "w" or die "$_: $!" } @ARGV; my @buf = grep { defined } map { scalar } (0 .. 49); my $n = 0; while (@buf) { my $fh = $mappers[$n % @mappers]; print $fh @buf; ++$n; @buf = grep { defined } map { scalar } (0 .. 49); } vowpal-wabbit-8.6.1.dfsg1/demo/movielens/ratings2vw000077500000000000000000000024171332666127000223020ustar00rootroot00000000000000#! /usr/bin/env perl use IO::File; use warnings; use strict; srand 69; sub output_user ($$@) { my ($trainfh, $testfh, @rows) = @_; return () unless @rows > 1; my @permrows = map { $_->[1] } sort { $a->[0] <=> $b->[0] } map { [ rand (), $_ ] } @rows; my @testrows = splice @permrows, -1; print $trainfh @permrows; return @testrows; } my $trainfile = shift @ARGV or die; my $testfile = shift @ARGV or die; my $trainfh = new IO::File $trainfile, "w" or die; my $testfh = new IO::File $testfile, "w" or die; my $olduser; my @rows; my @save; my %seen; while (defined ($_ = <>)) { chomp; my ($user, $movie, $rating, undef) = split /::/, $_; if (defined ($olduser) && $user != $olduser) { push @save, output_user ($trainfh, $testfh, @rows); undef @rows; die "input file not collated" if $seen{$olduser}++; } push @rows, "$rating $rating|user $user |movie $movie\n"; $olduser = $user; } push @save, output_user ($trainfh, $testfh, @rows); my @permsave = map { $_->[1] } sort { $a->[0] <=> $b->[0] } map { [ rand (), $_ ] } @save; my @test = splice @permsave, 0, 5000; print $trainfh @permsave; print $testfh @test; vowpal-wabbit-8.6.1.dfsg1/demo/movielens/visualize_factors.R000066400000000000000000000045661332666127000241340ustar00rootroot00000000000000# Create movie dendrogram based on latent factors library(reshape) ########################################################## # Utility functions ########################################################## # Load factors file (as preprocessed by shell script) and join with movie names # for better visualization loadMovieFactors <- function(name) { lrq_movies <- read.csv('lrqdropout.results.model.csv',header=F,sep=':') names(lrq_movies) <- c('lrq','namespace','movie','factor','hash','weight') movie_list <- read.csv('ml-1m/movies.dat',header=F,sep=':',fileEncoding='latin1') movies <- data.frame(movie=movie_list$V1,name=movie_list$V3,genre=movie_list$V5) movies$full_name <- paste(movies$name,movies$genre,sep=' / ') mm <- merge(lrq_movies,movies,by='movie') movie_factors <- cast(mm, full_name ~ factor, value='weight', sum) rownames(movie_factors) <- movie_factors$full_name subset(movie_factors, select = -c(full_name)) } # Calculate distances between movie latent factors using cosine similarity cosineSimilarity <- function(df){ x <- as.matrix(df) m <- 1 - x%*%t(x)/(sqrt(rowSums(x^2) %*% t(rowSums(x^2)))) rownames(m) <- rownames(df) colnames(m) <- rownames(df) m } # Generate a hierarchical clustering dendrogram, color the N cluster founds with alternating colors # The movie titles are going to be really small because of the number of movies, better to use # a dendrogram viewing tool. clusterMovies <- function(distances, N) { hc <- hclust(distances) clusMember <- cutree(hc,N) labelColors <- rep(c("#036564", "#EB6841"), N/2) colLab <- function(n) { if (is.leaf(n)) { a <- attributes(n) labCol <- labelColors[clusMember[which(names(clusMember) == a$label)]] attr(n, "nodePar") <- c(a$nodePar, lab.col = labCol) } n } hcd <- as.dendrogram(hc) dendrapply(hcd, colLab) } # Save the dendrogram in PDF so that it is possible to zoom into the movie names saveDendrogram <- function(dendrogram,fname,w=40,h=15,cex=1.0) { pdf(fname, width=w, height=h) par(cex=cex,mai=c(8,2,2,2)) plot(dendrogram) dev.off() } movie_factors <- loadMovieFactors('lrqdropout.results.model.csv') distances <- cosineSimilarity(movie_factors) dendrogram <- clusterMovies(as.dist(distances), 80) saveDendrogram(dendrogram,"movie_dendrogram.pdf",w=400,h=150,cex=0.5) saveDendrogram(cut(dendrogram,h=1.5)$lower[[1]],"movie_dendrogram_small.pdf",w=20) vowpal-wabbit-8.6.1.dfsg1/demo/normalized/000077500000000000000000000000001332666127000204055ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/demo/normalized/.gitignore000066400000000000000000000000511332666127000223710ustar00rootroot00000000000000*.gz *.zip *.Z *.bz2 *.best *.nonormbest vowpal-wabbit-8.6.1.dfsg1/demo/normalized/Makefile000066400000000000000000000256331332666127000220560ustar00rootroot00000000000000SHELL=/bin/bash VW=../../vowpalwabbit/vw .SECONDARY: all: @cat README.md datasets := bank census covertype CTslice MSD shuttle all.results.pre: $(foreach what,$(datasets),$(what).best $(what).nonormbest) @printf "%20.20s\t%9s\t%9s\t%9s\t%9s\n" "Dataset" "NAG eta^*" "NAG loss" "AG eta^*" "AG loss" all.results: all.results.pre $(foreach what,$(datasets),$(what).resultsprint) SHUFFLE='BEGIN { srand 69; }; \ $$i = int rand 100000; \ print $$b[$$i] if $$b[$$i]; \ $$b[$$i] = $$_; } { print grep { defined $$_ } @b;' #--------------------------------------------------------------------- # bank marketing # # normalization really helps. The columns have units of euros, seconds, # days, and years; in addition there are categorical variables. #--------------------------------------------------------------------- bank.zip: @echo "downloading bank ..." 1>&2 @wget -q -O - \ http://archive.ics.uci.edu/ml/machine-learning-databases/00222/$@ \ > $@ bank.preprocess.data.gz: bank.zip @echo "preprocessing bank ..." 1>&2 @zcat $< 2>/dev/null | \ perl -MScalar::Util -F';' -lane '$$l = pop @F; $$p = ($$l =~ /no/) ? -1 : 1; print "$$p $$p|f ", join " ", map { $$isn = Scalar::Util::looks_like_number ($$F[$$_]); $$F[$$_] =~ s/^\s+//; $$F[$$_] =~ s/\s+$$//; $$F[$$_] =~ s/\W/_/g unless $$isn; $$isn ? "@{[$$_+1]}:$$F[$$_]" : "@{[$$_+1]}_$$F[$$_]" } grep { $$F[$$_] =~ /\w/ && ( ! Scalar::Util::looks_like_number ($$F[$$_]) || $$F[$$_] > 0 ) } (0 .. $$#F)' | \ perl -e 'BEGIN { srand 69; }; print map { $$_->[1] } sort { $$a->[0] <=> $$b->[0] } map { [ rand (), $$_ ] } <>;' | gzip \ > $@ bank.data: bank.preprocess.data.gz bank.%.nonormlearn: bank.preprocess.data.gz @${VW} --loss_function logistic -b 22 -q ff -l $* $< \ --adaptive --invariant \ -p >(perl -lane ' \ 1; ++$$n; $$l+=1.0 if $$F[0] * $$F[1] < 0; } \ 1; { printf "average loss = %f\t%u\t%u\n", $$l/$$n, $$l, $$n;') bank.%.learn: bank.preprocess.data.gz @${VW} --loss_function logistic -b 22 -q ff -l $* $< \ -p >(perl -lane ' \ 1; ++$$n; $$l+=1.0 if $$F[0] * $$F[1] < 0; } \ 1; { printf "average loss = %f\t%u\t%u\n", $$l/$$n, $$l, $$n;') bankbestmin=1e-2 bankbestmax=10 banknonormbestmin=1e-8 banknonormbestmax=1e-3 banktimeestimate=1 #--------------------------------------------------------------------- # covertype #--------------------------------------------------------------------- covtype.data.gz: @echo "downloading covertype ..." @wget -q -O - \ http://archive.ics.uci.edu/ml/machine-learning-databases/covtype/$@ \ > $@ covtype.preprocess.data.gz: covtype.data.gz @echo "preprocessing covertype ..." 1>&2 @perl -F',' \ -lane '$$l = pop @F; print "$$l $$l|f ", map { "$$_:$$F[$$_] "} grep { $$F[$$_] } (0 .. $$#F)' <(zcat $<) | \ perl -e 'BEGIN { srand 69; }; print map { $$_->[1] } sort { $$a->[0] <=> $$b->[0] } map { [ rand (), $$_ ] } <>;' | gzip \ > $@ covertype.data: covtype.preprocess.data.gz covertype.%.nonormlearn: covtype.preprocess.data.gz ${VW} --adaptive --invariant -b 22 --hash all -q ff --oaa 7 -l $* -d $< covertype.%.learn: covtype.preprocess.data.gz ${VW} -b 22 --hash all -q ff --oaa 7 -l $* -d $< covertypebestmin=1e-2 covertypebestmax=10 covertypenonormbestmin=1e-8 covertypenonormbestmax=1e-5 covertypetimeestimate=5 #--------------------------------------------------------------------- # million song database # # normalization is helpful. #--------------------------------------------------------------------- YearPredictionMSD.bz2: @echo "downloading MSD ..." 1>&2 @wget -q -O - \ http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/$@ \ > $@ year.preprocess.data.gz: YearPredictionMSD.bz2 @echo "preprocessing MSD ..." 1>&2 @perl -lane 'splice @F, 1, 0, "$$F[0]|f"; print join " ", @F' \ <(bzcat $<) | \ perl -ne $(SHUFFLE) \ | gzip > $@ MSD.data: year.preprocess.data.gz MSD.%.learn: year.preprocess.data.gz @zcat $< | \ perl -lane '$$F[0] -= 1998; print join " ", @F' | \ ${VW} -q ff -l $* \ -p >(perl -lane ' \ 1; BEGIN { $$max = 0; $$min = 3000; }; \ 1; ++$$n; $$l += ($$F[0] + 1998 - $$F[1])**2; \ 1; $$max = $$F[1] if $$max < $$F[1]; \ 1; $$min = $$F[1] if $$min > $$F[1]; \ 1; } { $$c = $$l / ($$max - $$min)**2; \ 1; printf "average loss = %f\t%.3f\t%.3f\t%.3f\t%.3f\n", $$c/$$n, $$l, $$n, $$min, $$max;') MSD.%.nonormlearn: year.preprocess.data.gz @zcat $< | \ perl -lane '$$F[0] -= 1998; print join " ", @F' | \ ${VW} --adaptive --invariant -q ff -l $* \ -p >(perl -lane ' \ 1; BEGIN { $$max = 0; $$min = 3000; }; \ 1; ++$$n; $$l += ($$F[0] + 1998 - $$F[1])**2; \ 1; $$max = $$F[1] if $$max < $$F[1]; \ 1; $$min = $$F[1] if $$min > $$F[1]; \ 1; } { $$c = $$l / ($$max - $$min)**2; \ 1; printf "average loss = %f\t%.3f\t%.3f\t%.3f\t%.3f\n", $$c/$$n, $$l, $$n, $$min, $$max;') MSDbestmin=1e-2 MSDbestmax=10 MSDnonormbestmin=1e-8 MSDnonormbestmax=1e-5 MSDtimeestimate=15 #--------------------------------------------------------------------- # census-income (KDD) #--------------------------------------------------------------------- census-income.data.gz: @echo "downloading census ..." 1>&2 @wget -q -O - \ http://archive.ics.uci.edu/ml/machine-learning-databases/census-income-mld/$@ \ > $@ census-income.preprocess.data.gz: census-income.data.gz @echo "preprocessing census ..." 1>&2 @zcat $< | \ perl -MScalar::Util -F',' -lane '$$l = pop @F; $$p = $$l =~ /-/ ? -1 : 1; print "$$p $$p|f ", join " ", map { $$isn = Scalar::Util::looks_like_number ($$F[$$_]); $$F[$$_] =~ s/^\s+//; $$F[$$_] =~ s/\s+$$//; $$F[$$_] =~ s/\W/_/g unless $$isn; $$isn ? "@{[$$_+1]}:$$F[$$_]" : "@{[$$_+1]}_$$F[$$_]" } grep { $$F[$$_] =~ /\w/ && ( ! Scalar::Util::looks_like_number ($$F[$$_]) || $$F[$$_] > 0 ) } (0 .. $$#F)' | \ perl -e 'BEGIN { srand 69; }; print map { $$_->[1] } sort { $$a->[0] <=> $$b->[0] } map { [ rand (), $$_ ] } <>;' | gzip \ > $@ census.data: census-income.preprocess.data.gz census.%.nonormlearn: census-income.preprocess.data.gz @${VW} --hash all --loss_function logistic -q ff -l $* \ --adaptive --invariant $< \ -p >(perl -lane ' \ 1; ++$$n; $$l+=1.0 if $$F[0] * $$F[1] < 0; } \ 1; { printf "average loss = %f\t%u\t%u\n", $$l/$$n, $$l, $$n;') census.%.learn: census-income.preprocess.data.gz @${VW} --hash all --loss_function logistic -q ff -l $* $< \ -p >(perl -lane ' \ 1; ++$$n; $$l+=1.0 if $$F[0] * $$F[1] < 0; } \ 1; { printf "average loss = %f\t%u\t%u\n", $$l/$$n, $$l, $$n;') censusbestmin=1e-2 censusbestmax=10 censusnonormbestmin=1e-8 censusnonormbestmax=1e-4 censustimeestimate=5 #--------------------------------------------------------------------- # Statlog (Shuttle) #--------------------------------------------------------------------- shuttle.trn.Z: @echo "downloading Shuttle ..." 1>&2 @wget -q -O - \ http://archive.ics.uci.edu/ml/machine-learning-databases/statlog/shuttle/$@ \ > $@ shuttle.preprocess.data.gz: shuttle.trn.Z @echo "preprocessing Shuttle ..." 1>&2 @zcat shuttle.trn.Z | \ perl -lane '$$l = pop @F; print "$$l |f ", join " ", map { "@{[$$_+1]}:$$F[$$_]" } grep { $$F[$$_] } (0 .. $$#F)' | \ perl -e 'BEGIN { srand 69; }; print map { $$_->[1] } sort { $$a->[0] <=> $$b->[0] } map { [ rand (), $$_ ] } <>;' | gzip \ > $@ shuttle.data: shuttle.preprocess.data.gz shuttle.%.nonormlearn: shuttle.preprocess.data.gz ${VW} --adaptive --invariant -b 22 --hash all -q ff --oaa 7 -l $* -d $< shuttle.%.learn: shuttle.preprocess.data.gz ${VW} -b 22 --hash all -q ff --oaa 7 -l $* -d $< shuttlebestmin=1e-2 shuttlebestmax=10 shuttlenonormbestmin=1e-8 shuttlenonormbestmax=1e-3 shuttletimeestimate=1 #--------------------------------------------------------------------- # CT slices # # normalization doesn't help much #--------------------------------------------------------------------- slice_localization_data.zip: @echo "downloading CTslice ..." 1>&2 @wget -q -O - \ http://archive.ics.uci.edu/ml/machine-learning-databases/00206/$@ \ > $@ slice_localization.preprocess.data.gz: slice_localization_data.zip @echo "preprocessing CTslice ..." 1>&2 @zcat $< | perl -F',' -lane 'BEGIN { scalar <>; }; shift @F; $$l = pop @F; $$l =~ s/\r//; $$l -= 47; print "$$l $$l|f ", join " ", map { ($$F[$$_] eq "-0.25") ? "$${_}_outside" : "$$_:$$F[$$_]" } grep { $$F[$$_] } (0 .. $$#F)' | perl -e 'BEGIN { srand 69; }; print map { $$_->[1] } sort { $$a->[0] <=> $$b->[0] } map { [ rand (), $$_ ] } <>;' | gzip \ > $@ CTslice.data: slice_localization.preprocess.data.gz CTslice.%.nonormlearn: slice_localization.preprocess.data.gz @${VW} --adaptive --invariant -b 22 --hash all -q ff -l $* $< \ -p >(perl -lane ' \ 1; BEGIN { $$max = -3000; $$min = 3000; }; \ 1; ++$$n; $$l += ($$F[0] - $$F[1])**2; \ 1; $$max = $$F[1] if $$max < $$F[1]; \ 1; $$min = $$F[1] if $$min > $$F[1]; \ 1; } { $$c = $$l / ($$max - $$min)**2; \ 1; printf "average loss = %f\t%.3f\t%.3f\t%.3f\t%.3f\n", $$c/$$n, $$l, $$n, $$min, $$max;') CTslice.%.learn: slice_localization.preprocess.data.gz @${VW} -b 22 --hash all -q ff -l $* $< \ -p >(perl -lane ' \ 1; BEGIN { $$max = -3000; $$min = 3000; }; \ 1; ++$$n; $$l += ($$F[0] - $$F[1])**2; \ 1; $$max = $$F[1] if $$max < $$F[1]; \ 1; $$min = $$F[1] if $$min > $$F[1]; \ 1; } { $$c = $$l / ($$max - $$min)**2; \ 1; printf "average loss = %f\t%.3f\t%.3f\t%.3f\t%.3f\n", $$c/$$n, $$l, $$n, $$min, $$max;') CTslicebestmin=1e-2 CTslicebestmax=10 CTslicenonormbestmin=1e-5 CTslicenonormbestmax=1 CTslicetimeestimate=15 #--------------------------------------------------------------------- # common routines #--------------------------------------------------------------------- %.best: %.data @echo "($*) searching for best in-hindsight learning rate for NAG" 1>&2 @printf "WARNING: this step takes about %s minutes\n" $($*timeestimate) @./hypersearch $($*bestmin) $($*bestmax) '$(MAKE)' '$*.%.learn' > $@ %.nonormbest: %.data @echo "($*) searching for best in-hindsight learning rate for AG" 1>&2 @printf "WARNING: this step takes about %s minutes\n" $($*timeestimate) @./hypersearch $($*nonormbestmin) $($*nonormbestmax) '$(MAKE)' '$*.%.nonormlearn' > $@ %.resultsprint: @printf "%20.20s\t%9.3g\t%9.3g\t%9.3g\t%9.3g\n" "$*" $$(cut -f1 $*.best) $$(cut -f2 $*.best) $$(cut -f1 $*.nonormbest) $$(cut -f2 $*.nonormbest) only.%: %.best %.nonormbest all.results.pre %.resultsprint @true .PHONY: all all.results all.results.pre vowpal-wabbit-8.6.1.dfsg1/demo/normalized/README.md000077500000000000000000000060231332666127000216700ustar00rootroot00000000000000normalized online learning demo ------------------------------- These demos show the ability of the normalized learning rule to adapt to varying feature scales, relative to an unnormalized learning rule. For more details on the normalized learning rule see [the paper](http://arxiv.org/abs/1305.6646). ### Instructions ### - `make all.results`: eventually produces a nice table outlining performance of normalized adaptive gradient (NAG) vs. unnormalized adaptive gradient (AG) for a variety of data sets. - **WARNING**: Please be aware that these demos can be network I/O, disk space, and/or CPU intensive. - The complete set of demos can take hours to compute the first time, although the results are cached for subsequent reproduction. - You will see lower progressive loss (regret) for NAG than AG. - You will also see that the optimal learning rate eta* varies less across datasets for NAG than AG. #### Details #### This is organized into individual demos which process a single dataset. The different individual demos are associated with unique make targets. An individual demo will download a data set and learn a predictor under two conditions: using the normalized learning rule aka NAG (this the vw default), and using adaptive gradient without normalization aka AG (invoked via vw arguments `--adaptive --invariant`). For both conditions it will do a hyper-parameter sweep to find the optimal in-hindsight learning rate eta*. Note the context here is online learning, so there is no train/test split; rather what is optimized is progressive loss over the input data. #### About the datasets #### Note when data is pre-normalized or otherwise does not exhibit varying scales, the normalized learning rule has essentially no effect. Therefore the data sets used in this demo have been selected because they exhibit varying scales. The data set [covertype](http://archive.ics.uci.edu/ml/datasets/Covertype) exemplifies how this arises in practice, as it consists of multiple physical measurements with different units. #### Just doing one dataset #### There are individual makefile targets that will just download and compare one dataset. You can compute a subset using a combination of make targets, e.g., `make only.{covertype,shuttle}`. Invocation | Dataset | Time | Disk | Network --- | --- | --- | --- | --- `make only.bank` | [bank](http://archive.ics.uci.edu/ml/datasets/Bank+Marketing) | 2 minutes | 1Mb | 1Mb `make only.census` | [census](http://archive.ics.uci.edu/ml/datasets/Census-Income+%28KDD%29) | 10 minutes | 13Mb | 7Mb `make only.covertype` | [covertype](http://archive.ics.uci.edu/ml/datasets/Covertype) | 12 minutes | 24Mb | 11Mb `make only.CTslice` | [CT Slice](http://archive.ics.uci.edu/ml/datasets/Relative+location+of+CT+slices+on+axial+axis) | 30 minutes | 40Mb | 18Mb `make only.MSD` | [MSD](http://archive.ics.uci.edu/ml/datasets/YearPredictionMSD) | 30 minutes | 500Mb | 256Mb `make only.shuttle` | [Shuttle](http://archive.ics.uci.edu/ml/datasets/Statlog+%28Shuttle%29) | 1 minute | 600Kb | 300Kb vowpal-wabbit-8.6.1.dfsg1/demo/normalized/hypersearch000077500000000000000000000064341332666127000226570ustar00rootroot00000000000000#! /usr/bin/env perl use warnings; use strict; # two parameters: # cmd - a command or reference to an array of command + arguments # timeout - number of seconds to wait (0 = forever) # returns: # cmd exit status (-1 if timed out) # cmd results (STDERR and STDOUT merged into an array ref) sub ExecCmd { my $cmd = shift || return(0, []); my $timeout = shift || 0; # opening a pipe creates a forked process my $pid = open(my $pipe, '-|'); return(-1, "Can't fork: $!") unless defined $pid; if ($pid) { # this code is running in the parent process my @result = (); if ($timeout) { my $failed = 1; eval { # set a signal to die if the timeout is reached local $SIG{ALRM} = sub { die "alarm\n" }; alarm $timeout; @result = <$pipe>; alarm 0; $failed = 0; }; return(-1, ['command timeout', @result]) if $failed; } else { while (<$pipe>) { push @result, $_; print STDERR "."; } } close($pipe); # return exit status, command output return ($? >> 8), \@result; } # this code is running in the forked child process { # skip warnings in this block no warnings; # redirect STDERR to STDOUT open(STDERR, '>&STDOUT'); # exec transfers control of the process # to the command ref($cmd) eq 'ARRAY' ? exec(@$cmd) : exec($cmd); } # this code will not execute unless exec fails! print "Can't exec @$cmd: $!"; exit 1; } sub evaluate ($) { my ($rate) = @_; my $nicerate = sprintf ("%.5g", $rate); print STDERR "trying $nicerate "; my @command = @ARGV; foreach my $c (@command) { $c =~ s/\%/$rate/g; }; my ($status, $rv) = ExecCmd \@command; die "subprocess invocation failed: $!" if $status; my $loss; foreach my $line (@$rv) { next unless $line =~ m%average loss = ([0-9\.]+)%; $loss = $1; } die "failed to parse average loss from vw output: ", join "", @$rv unless defined ($loss); warn " $loss\n"; return $loss; } sub argmin3 ($$$$$$) { my ($a, $fa, $b, $fb, $c, $fc) = @_; if ($fa < $fb) { return $fa < $fc ? ($a, $fa) : ($c, $fc); } else { return $fb < $fc ? ($b, $fb) : ($c, $fc); } } my $lb = shift @ARGV; my $ub = shift @ARGV; my $flb = evaluate ($lb); my $fub = evaluate ($ub); my $tol = 1e-2; my $phi = (1.0 + sqrt (5.0)) / 2.0; my $resphi = 2.0 - $phi; my $mid = $lb + $resphi * ($ub - $lb); my $fmid = evaluate ($mid); while (abs ($ub - $lb) > $tol * abs ($ub + $lb)) { my $x = ($ub - $mid > $mid - $lb) ? $mid + $resphi * ($ub - $mid) : $mid - $resphi * ($mid - $lb); my $fx = evaluate ($x); if ($fx < $fmid) { if ($ub - $mid > $mid - $lb) { $lb = $mid; $mid = $x; $flb = $fmid; $fmid = $fx; } else { $ub = $mid; $mid = $x; $fub = $fmid; $fmid = $fx; } } else { if ($ub - $mid > $mid - $lb) { $ub = $x; $fub = $fx; } else { $lb = $x; $flb = $fx; } } } my ($best, $fbest) = argmin3 ($lb, $flb, $mid, $fmid, $ub, $fub); print "$best\t$fbest\n"; vowpal-wabbit-8.6.1.dfsg1/demo/ocr/000077500000000000000000000000001332666127000170245ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/demo/ocr/Makefile000066400000000000000000000036121332666127000204660ustar00rootroot00000000000000# Handwritten words dataset collected by # Rob Kassel at MIT Spoken Language Systems Group # http://ai.stanford.edu/~btaskar/ocr/ VW = ../../vowpalwabbit/vw # same settings as do-mnist-train VW_OPTS = -b 24 -l 0.1 --nn 40 # http://stackoverflow.com/questions/1541844/joining-elements-of-a-list-in-gnu-make noop= space = $(noop) $(noop) VW_RUN = $(subst $(space),_,$(VW_OPTS)) RM = rm -f # Python > 2.7 is required PYTHON = python help: @echo handwritten words dataset collected by @echo Rob Kassel at MIT Spoken Language Systems Group @echo http://ai.stanford.edu/~btaskar/ocr/ @echo $$ make run letter.data.gz: wget http://ai.stanford.edu/~btaskar/ocr/letter.data.gz letter.names: wget http://ai.stanford.edu/~btaskar/ocr/letter.names letter.vw: ocr2vw.py letter.data.gz letter.names $(PYTHON) $^ $@ $@.test cut -d' ' -f1 $@ | sort | uniq -c | sort -n # category count CATN = 26 letter.model_$(VW_RUN): letter.vw time $(VW) --oaa $(CATN) --final_regressor $@ \ --adaptive --invariant --holdout_off \ --loss_function logistic --passes 14 \ $(VW_OPTS) --data $< -k --cache_file $<.cache_$(VW_RUN) $(RM) $<.cache_$(VW_RUN) letter.predictions_$(VW_RUN): letter.model_$(VW_RUN) time $(VW) --testonly --initial_regressor $< --predictions $@ \ --data letter.vw.test # taken almost verbatim from ../mnist/Makefile CONFUSION='++$$n; $$p=int($$F[0]); $$l=ord($$F[1])-ord("a")+1; \ ++$$c if $$p != $$l; \ ++$$m{"$$l:$$p"}; } { \ print "$* test errors: $$c out of $$n = " . \ sprintf("%.2f%%",100*$$c/$$n) . \ "\nconfusion matrix (rows = truth, columns = prediction):"; \ foreach $$true (1 .. $(CATN)) { \ print join "\t", map { $$m{"$$true:$$_"} || 0 } (1 .. $(CATN)); \ }' %.confusion_$(VW_RUN): %.predictions_$(VW_RUN) @perl -lane $(CONFUSION) $< > $@ @cat $@ run : letter.confusion_$(VW_RUN) clean: $(RM) letter.* .PHONY: clean run vowpal-wabbit-8.6.1.dfsg1/demo/ocr/ocr2vw.py000077500000000000000000000042541332666127000206300ustar00rootroot00000000000000# convert letter.data to letter.vw def read_letter_names (fn): ret = list() with open(fn) as ins: for line in ins: ret.append(line.rstrip()) print "Read %d names from %s" % (len(ret),fn) return ret def find_pixel_start (names): for i in range(len(names)): if names[i].startswith("p_"): return i raise ValueError("No pixel data",names) def data2vw (ifn, train, test, names): lineno = 0 trainN = 0 testN = 0 if ifn.endswith(".gz"): import gzip iopener = gzip.open else: iopener = open id_pos = names.index("id") letter_pos = names.index("letter") pixel_start = find_pixel_start(names) with iopener(ifn) as ins, open(train,"wb") as trainS, open(test,"wb") as testS: for line in ins: lineno += 1 vals = line.rstrip().split('\t') if len(vals) != len(names): raise ValueError("Bad field count", len(vals),len(names),vals,names) char = vals[letter_pos] if len(char) != 1: raise ValueError("Bad letter",char) if lineno % 10 == 0: testN += 1 outs = testS else: trainN += 1 outs = trainS outs.write("%d 1 %s-%s|Pixel" % (ord(char)-ord('a')+1,char,vals[id_pos])) for i in range(pixel_start,len(names)): if vals[i] != '0': outs.write(' %s:%s' % (names[i],vals[i])) outs.write('\n') print "Read %d lines from %s; wrote %d lines into %s and %d lines into %s" % ( lineno,ifn,trainN,train,testN,test) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='Convert letters.data to VW format') parser.add_argument('input',help='path to letter.data[.gz]') parser.add_argument('names',help='path to letter.names') parser.add_argument('train',help='VW train file location (90%)') parser.add_argument('test',help='VW test file location (10%)') args = parser.parse_args() data2vw(args.input,args.train,args.test,read_letter_names(args.names)) vowpal-wabbit-8.6.1.dfsg1/demo/random-noise/000077500000000000000000000000001332666127000206345ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/demo/random-noise/Makefile000066400000000000000000000001301332666127000222660ustar00rootroot00000000000000 all:: @echo === This demo is interactive: hit enter to move to next step ./vw-demo vowpal-wabbit-8.6.1.dfsg1/demo/random-noise/README000066400000000000000000000063511332666127000215210ustar00rootroot00000000000000Demo of vowpal wabbit's ability to separate signal from noise. -------------------------------------------------------------- Prerequisites: -------------- All the components below must be present and located somewhere in your $PATH in order to run the demo. 1) A reasonable Unix-like env, with standard utilities including: GNU less GNU diff bash perl GNU make (not needed if you run 'vw-demo' directly) 2) The following executables (from your vw distribution): vw vw-varinfo 3) The following are used to estimate 'goodness' of model and create density & correlation charts: R ggplot2 (R library. Can be installed from within R see http://ggplot2.org/) Main idea of this demo: ----------------------- As long as noise is random, it cannot bias a model. If it was biased, and could affect the model we're building - it would't be called "noise." The linear expression used in the demo is: y = a + 2b - 5c + 7 (It is a parameter you can actually pass to the main script to change the default). Each demo sequence consists of the following steps: Generate a random train set in which the label y is a perfect linear combination (a + 2b - 5c + 7) of the input features. Generate a model from this random train-set. Generate a _separate_ train-set using the same linear combination formula, but with a different random seed. so the two sets are completely different, even though they are based on the same formula (and model.) Train a model on the train set. Test the model on the test-set, while ignoring the existing labels. The predicted results should equal the actual test set labels. The demo has 3 parts: --------------------- 1) Ideal conditions. No noise added. 2) Global noise in the range [-1, 1] added to the continuous label y in the train-set. 3) Per input-feature noise of +/-50% of each variable range is added to the train set. This affects the label in a 3-modal dispersed way (as shown in a chart produced by the demo) The noise is generated using the standard perl 'rand' function, roughly simulating a uniform distribution, using a simple pseudo-random number generator. We demonstrate that despite the noise, the (almost) perfect model is being learned in each of the 3 parts of the demo. In fact, the small imprecisions in the model are due to us deliberately limiting the precision of the data sets to 6-digits after the decimal point (for brevity and readability purposes). You may change this by passing '-p N' where N is different than 6 to the 'random-poly' script. The whole demo is scripted. --------------------------- All you need to do is: 1) call './vw-demo' (or type 'make') from the shell 2) Hit Enter (repeatedly) to go to the next step In cases the pager (less) is called, you need to hit 'q' to exit the pager. Read the presentation: ---------------------- A presentation (set of slides) including this demo in both pdf and ppt formats can be found here: http://finance.yendor.com/ML/VW/ -- ariel faigon - aug 2013 vowpal-wabbit-8.6.1.dfsg1/demo/random-noise/distrib.r000077500000000000000000000046001332666127000224620ustar00rootroot00000000000000#!/usr/bin/Rscript # --vanilla # # distrib.r: # utility to plot distribution/density of a numeric data-set column # # Usage: # distrib.r data_file ["optional chart title string"] # where data_file contains the numeric vector, a number per line. # # -- where to look for R libraries # .libPaths(c('~/local/lib/R', # '/usr/lib/R/library', # '/usr/lib/R/site-library' # )) suppressPackageStartupMessages(library(ggplot2)) ratio = 1.61803398875 W = 4 H = W / ratio DPI = 200 FONTSIZE = 9 MyGray = 'grey50' title.theme = element_text(family="FreeSans", face="bold.italic", size=FONTSIZE-2) x.title.theme = element_text(family="FreeSans", face="bold.italic", size=FONTSIZE-2, vjust=-0.1) y.title.theme = element_text(family="FreeSans", face="bold.italic", size=FONTSIZE-2, angle=90, vjust=0.2) x.axis.theme = element_text(family="FreeSans", face="bold", size=FONTSIZE-2, colour=MyGray) y.axis.theme = element_text(family="FreeSans", face="bold", size=FONTSIZE-2, colour=MyGray) legend.theme = element_text(family="FreeSans", face="bold.italic", size=FONTSIZE-1, colour="black") eprintf <- function(...) cat(sprintf(...), sep='', file=stderr()) argv <- commandArgs(trailingOnly = TRUE) csvfile <- argv[1] title <- ifelse(! is.na(argv[2]), argv[2], 'vw demo: random expression distribution') Ys <- read.csv(csvfile, header=F, col.names='Ys') d <- data.frame(Ys=Ys) Y_labels <- function(yrange) { the.min <- as.integer(floor(yrange[1])) the.max <- as.integer(ceiling(yrange[2] + 1)) seq(from=the.min, to=the.max, by=1) } # geom_histogram(binwidth=.5, alpha=.5, position="identity") # geom_histogram(fill='#3377ff', # binwidth=.01, alpha=.4, stat='density') + g <- ggplot(data=d, aes(x=Ys)) + geom_density(fill='#3377ff', alpha=0.4, lwd=0.2) + scale_x_continuous(breaks=Y_labels(range(Ys))) + ggtitle(title) + xlab(NULL) + theme( plot.title=title.theme, axis.title.y=y.title.theme, axis.title.x=x.title.theme, axis.text.x=x.axis.theme, axis.text.y=y.axis.theme ) pngfile <- sprintf("%s.density.png", csvfile) ggsave(g, file=pngfile, width=W, height=H, dpi=DPI) vowpal-wabbit-8.6.1.dfsg1/demo/random-noise/random-poly000077500000000000000000000140711332666127000230260ustar00rootroot00000000000000#!/usr/bin/perl -w # vim: ts=4 sw=4 expandtab # # Generate a random data-set matching a linear expression # TODO: support real polynomial expressions with x^n # use Getopt::Std; use Scalar::Util qw(looks_like_number); use vars qw($opt_v $opt_D $opt_n $opt_c $opt_t $opt_s $opt_p $opt_w $opt_r $opt_R); my $DefaultN = 10; # number of generated examples my @DeafultExpr = ('a'); my $DefaultFloatPrec = 6; # of decimal points for floating point values my $Sep = ' '; my $ConstStr = '_CONST_'; sub v { return unless $opt_v; if (@_ == 1) { print STDERR @_; } else { printf STDERR @_; } } # Undocumented, for debugging sub D { return unless $opt_D; if (@_ == 1) { print STDERR @_; } else { printf STDERR @_; } } sub usage(@) { print STDERR @_, "\n" if @_; die "Usage: $0 [Options] expression... Options: -v verbose -n Generate rows (data-set examples) -s Call srand() at start -p

Set data-set feature precision to

digits -w Add weight to examples -r Add uniform random noise in range [, ] -R Add per-feature uniform random noise in range [, ] -c output in CSV format (label is 1st column) -t output in TSV format (label is 1st column) Default format is VW (vowpal-wabbit) Examples of expression: 2x + 5y - 4 w +x +y +z ... "; } # # Convert to a list of easily parsable terms, e.g: # "2x+70y-1" => 2x +70y -1 # sub argv_2_terms(@) { my $one_arg = "@_"; $one_arg =~ tr/ //d; $one_arg =~ s/([-+])/ $1/g; my @term_list = split(' ', $one_arg); v("argv_2_terms($one_arg) -> @term_list\n"); @term_list; } sub init { $0 =~ s{.*/}{}; getopts('vDn:ctwp:s:r:R:'); $Sep = ($opt_c) ? ',' : $opt_t ? "\t" : ' '; $opt_n = $DefaultN unless ($opt_n); $opt_p = $DefaultFloatPrec unless (defined $opt_p); # srand($opt_s ? $$^time : 0); srand($opt_s ? $opt_s : 0); if ($opt_r) { my ($min, $max) = split(/[\s,;]+/, $opt_r); usage("-r: bad expresssion, expecting numeric ,") unless (looks_like_number($min) && looks_like_number($max)); usage("-r: bad expresssion, > ") unless ($min <= $max); # Turn opt_r into a function using currying $opt_r = sub { my $r = shift; return $min + ($r * ($max - $min)); } } if ($opt_R) { my ($min, $max) = split(/[\s,;]+/, $opt_R); usage("-R: bad expresssion, expecting numeric ,") unless (looks_like_number($min) && looks_like_number($max)); usage("-R: bad expresssion, > ") unless ($min <= $max); my $range = ($max - $min); # Turn opt_R into a function using currying $opt_R = sub { my $r = shift; my $r2 = rand(1.0); # make the random noise # - Make it a multiplier on the original $r # - multiplier is a random $r2 point in the range (max - min) # - Make it work both ways my $noise = $min + $r2 * $range; my $retval = $r * $noise; v("\topt_R: r2=%g*(min,max)=(%g,%g) noise=%g * \$r=%g => %g\n", $r2, $min, $max, $noise, $r, $retval); return $retval; } } @ARGV = @DeafultExpr unless (@ARGV); @ARGV = argv_2_terms(@ARGV); } sub parse_expr(@) { my @expr = (); foreach my $term (@_) { if ($term =~ /^([-+]?)(\d*(?:\.\d*)?)\*?([A-Za-z]*)$/) { my ($sign, $coeff, $varname) = ($1, $2, $3); $sign = ($sign eq '-') ? -1.0 : 1.0; $coeff = 1.0 unless ($coeff); $coeff *= $sign; $varname = $ConstStr unless ($varname); push(@expr, [$coeff, $varname]); v("parse_expr: [coeff, varname]: [%s, %s]\n", $coeff, $varname); } else { usage("bad term: '$term': expecting : e.g. 2.3x"); } } @expr; } sub gen_dataset(@) { my (@expr) = @_; for ($i = 1; $i <= $opt_n; $i++) { my $result = 0; my $feature_str = ''; for ($nvar = 0; $nvar < @expr; $nvar++) { my ($coeff, $varname) = @{$expr[$nvar]}; my ($r, $term_value); if ($varname eq $ConstStr) { $r = $coeff; $term_value = $coeff; } else { $r = rand(1.0); $term_value = $r; if (defined $opt_R) { my $random_add = $opt_R->($r); v("\topt_R: r: %g -> random_add: %g\n", $r, $random_add); $term_value += $random_add; } $term_value *= $coeff; } v("\tgen_dataset: [%s%s] r:%.*f => %.*f\n", $coeff, $varname, $opt_p, $r, $opt_p, $term_value); if ($varname ne $ConstStr) { # Don't add constant to the input features # Add it only to the label $feature_str .= ($opt_c || $opt_t) ? sprintf("%s%.*f", $Sep, $opt_p, $r) : sprintf("%s%s:%.*f", $Sep, $varname, $opt_p, $r); } $result += $term_value; } if (defined $opt_r) { my $random_noise = $opt_r->(rand(1.0)); # v("random_noise: %g\n", $random_noise); $result += $random_noise; } if ($opt_c || $opt_t) { printf "%.*f%s\n", $opt_p, $result, $feature_str; } elsif ($opt_w) { # -- add weight printf "%.*f %s %s|f%s\n", $opt_p, $result, 1, $i, $feature_str; } else { printf "%.*f '%s|f%s\n", $opt_p, $result, $i, $feature_str; } } } # -- main init(); my @expr = parse_expr(@ARGV); gen_dataset(@expr); vowpal-wabbit-8.6.1.dfsg1/demo/random-noise/vw-demo000077500000000000000000000302601332666127000221410ustar00rootroot00000000000000#!/bin/bash # # Vowpal Wabbit interactive demo of noise-resistance # # Requires the following to be installed on your machine: # # 1) vw - the vowpal-wabbit executable # 2) R + ggplot2 - for all the beautiful charts # 3) A few scripts included with this one: # 3a) random-poly - a perl script for generating random data-sets # 3b) distrib.r - Density distribution plot utility, written in R # 3c) x-vs-y.r - X vs Y correleation plot utility, written in R # export PATH=.:$PATH Pager='less' ImgViewCandidates="gwenview display irfanview xee preview" ImgViewer= Poly='a + 2b - 5c + 7' find_ggplot2() { err=`Rscript -e 'library(ggplot2)' 2>&1 | grep 'Error.*ggplot'` case "$err" in *rror*) echo "Couldn't find the R ggplot2 library. Is it installed?" 1>&2 echo -- "$err" 1>&2 exit 1 ;; esac } find_image_viewer() { # Please add your favorite OS image viewer here for exe in $ImgViewCandidates; do case `which $exe` in '') : keep going... ;; *) ImgViewer=$exe : found image viewer: $ImgViewer break ;; esac done case "$ImgViewer" in '') echo "Sorry: coudn't find an image viewer in PATH=$PATH Please add your viewer to 'ImgViewCandidates' in the '$0' script." 1>&2 exit 1 ;; esac } check_prereqs() { missing=0 find_image_viewer for exe in vw R Rscript random-poly distrib.r x-vs-y.r; do case `which $exe` in '') echo "$0: can't find $exe in PATH=$PATH - please install it" missing=$(($missing+1)) ;; *) : found $exe - cool ;; esac done case $missing in 0) : ;; *) exit 1 ;; esac find_ggplot2 } # # demo_cmd is the work-horse of our presentation. # 'main' can be simply a sequence of multiple calls to it. # It has 3 goals: # 1) Ensure we get all the little details right and never # make a mistake during the actual presentation # 2) Save time typing stuff # 3) Anyone else can reproduce what we did perfectly in their # own env. # # demo_cmd [options] 'header/explanation string' 'command string' # options: # -p don't pause for user to hit [enter] # -h don't print the header-string # -s don't advance the step # -e don't echo the command (be silent), just execute # -c don't execute the command # demo_cmd() { # by default we do all of them opt_p=1; opt_h=1; opt_s=1; opt_e=1 opt_c=1 # Must initialize OPTIND since it doesn't reset between # calls to 'demo_cmd()'! OPTIND=1 while getopts 'phsec' opt; do case "$opt" in p) opt_p= ;; h) opt_h= ;; s) opt_s= ;; e) opt_e= ;; c) opt_c= ;; esac done shift $((OPTIND-1)) header="$1" cmd="$2" # echo "demo_cmd: args: |$@| header=|$header| cmd=|$cmd| OPTIND=$OPTIND" case $opt_s in 1) step=$(($step+1)) ;; esac case $opt_h in 1) echo "=== $step: $header" ;; esac case $opt_p in 1) # read the command-line in, but allow real-time edits # via GNU readline read -ep "\$ " -i "$cmd" ans ;; *) case $opt_e in 1) # If we have no readline/prompt we need to print # the command so it can be seen by the audience echo -n "\$ $cmd" ;; esac ;; esac case $opt_c in 1) case $opt_p in 1) eval "$ans" ;; *) eval "$cmd" ;; esac echo ;; esac } label_column() { data_file="$1" label_file="$2" cut -d' ' -f1 $data_file > $label_file } y_density() { data_file=$1 chart_title=$2 label_file="Ys/$data_file" label_column "$data_file" "$label_file" distrib.r $label_file "$chart_title" $ImgViewer $label_file.density.png 2>/dev/null } clean_slate() { /bin/rm -f r.* Ys 2>/dev/null mkdir -p Ys } # # demo_session # full session of random-data-generation, training, testing... # demo_session() { mode="$1" step=0 clean_slate # --- train-set generation case $mode in globalnoise) rand='-r -1,1'; msg=' (w/ global noise)' ;; varnoise) rand='-R -.5,.5'; msg=' (w/ per-var noise)' ;; regularize) rand='-R -.5,.5'; msg=' (w/ per-var noise)' ;; clean) rand='-r 0,0'; msg='' ;; # no noise added esac demo_cmd "Generate a training data-set$msg & inspect it (-n N is number of data-points (examples) -pN is data precision -r min,max is global added noise -R min,max is per-variable added noise):" \ "random-poly -n 50000 -p6 $rand $Poly > r.train" case $mode in 'clean') # Only do this the 1st time, othewise it is getting tedious demo_cmd -s "inspect the training-set (use 'q' to exit $Pager):" \ "$Pager r.train" ;; esac demo_cmd "Visualize train-set Ys (labels) density [~5 secs to generate chart]:" \ "y_density r.train 'Train-set random expression distribution: $Poly'" case $mode in # --- in the case where we added noise, # --- add a step of showing how big is the noise *noise) echo '+-----------------------------------------------------+' echo '| visualize the added random noise |' echo '+-----------------------------------------------------+' # -- Prepare the reference Ys (without the noise) case $mode in globalnoise) random-poly -n 50000 -p6 -r 0,0 $Poly | \ cut -d' ' -f1 > Ys/r.train.nonoise ;; varnoise) random-poly -n 50000 -p6 -R 0,0 $Poly | \ cut -d' ' -f1 > Ys/r.train.nonoise ;; esac demo_cmd -p "Generate plot of clean vs NOISY Ys (labels)" \ 'x-vs-y.r Ys/r.train.nonoise Ys/r.train X-vs-Y.png' demo_cmd -p "View plot of CLEAN (X) vs NOISE-filled (Y) values " \ "$ImgViewer X-vs-Y.png 2>/dev/null" ;; esac # --- Training case $mode in globalnoise) vw_args='' msg1='Train: let VW build a model on noisy -1/+1 train-set' msg2='Train: look at the model weights' ;; varnoise) vw_args='' msg1='Train: let VW build a model (w/ per var noise)' msg2='Train: look at the model weights (w/ per var noise)' ;; regularize) vw_args='--l2 0.000001' msg1='Train: let VW build a model (w/ anti-noise --l2)' msg2='Train: look at the model weights (w/ anti-noise --l2)' ;; clean) # vanilla vw_args='-l 5' msg1='Train: let VW build a model from the train-set' msg2='Train: look at the model weights' ;; esac demo_cmd "$msg1" "vw -k $vw_args r.train -f r.model" echo '+------------------------------------------------------------------+' echo '# Notice how fast training took to complete (about 0.1 sec).' echo '# vw is faster processing data than all other programs in this demo.' echo '#' echo '# Since learning is faster than IO, and runs in a separate thread,' echo '# vw training speed is limited only by the time to read the data.' echo '+------------------------------------------------------------------+' demo_cmd "$msg2" "vw-varinfo -k $vw_args r.train" echo '+------------------------------------------------------------------+' echo '# Notice how accurate the model is: model weights are exactly,' echo "# or very close to our target linear expression: $Poly" echo '+------------------------------------------------------------------+' # --- test-set generation demo_cmd "Generate a test data-set (note different random seed: -s)" \ "random-poly -n 50000 -p6 -s 1313131 $Poly > r.test" demo_cmd "Show that train and test data-sets are different" \ 'diff <(head -9 r.train) <(head -9 r.test)' demo_cmd "Visualize test-set Ys (labels) density [~5 sec to generate chart]:" \ "y_density r.test 'Test-set random expression distribution: $Poly'" demo_cmd -p "Clear the Ys (labels) from the test-set" \ 'perl -i -pe "s/\S+/0/" r.test' case $mode in 'clean') # Only do this the 1st time, othewise it is getting tedious demo_cmd -s "inspect test-set to see Ys (labels) are gone (hit 'q' to exit $Pager):" \ "$Pager r.test" esac # --- prediction of test-set Ys using trained-model demo_cmd "Predict: VW uses the model to predict the test-set Ys (labels)" \ 'vw -t -i r.model r.test -p r.predict' echo '+-----------------------------------------------------------+' echo '# Since Ys (labels) have been zeroed - the reported error' echo '# is large even though predictions are, in fact, accurate.' echo '# We are also running vw with "-t" (test-only) so no weights' echo '# are being updated in-memory during the prediction run.' echo '+-----------------------------------------------------------+' demo_cmd -p "Extract 1st column (Ys) of prediction set (label_column is an internal func defined in $0)" \ 'label_column r.predict Ys/r.predict' # --- Check prediction (vs. actual) quality # textual eyeball inspection demo_cmd "Compare predictions with actual values side-by-side Note how close they are, since the model weights are near-perfect:" \ "diff -y -W 24 Ys/r.predict Ys/r.test | $Pager" demo_cmd -p "Plot predictions vs actual (test) values [~5 secs to generate chart]:" \ 'x-vs-y.r Ys/r.predict Ys/r.test X-vs-Y.png' demo_cmd "Look at plot of predicted vs actual (test) values:" \ "$ImgViewer X-vs-Y.png 2>/dev/null" } # # -- main # check_prereqs case "$@" in # support passing an initial expression for the whole demo # from the command line *[0-9a-zA-Z]*) Poly="$@" ;; esac echo '+-----------------------------------------------------------------+' echo '| Demo of vw ability to separate signal from noise |' echo '| |' echo '| 1) Create a random data-set & learn from it (perfectly). |' echo '| 2) Add global noise to each example, and finally, |' echo '| 3) Add a separate noise component to each input feature. |' echo '| |' echo '| Goal: demonstrate how vw creates near perfect models |' echo '| despite various forms of noise. |' echo '| At each of the 3 steps we visualized he data-set label density, |' echo '| the noise, and the model prediction quality using R+ggplot2. |' echo '+-----------------------------------------------------------------+' echo '+-----------------------------------------------------------------+' echo '| 1) First session warm-up: in a "perfect" world (no noise)... |' echo '+-----------------------------------------------------------------+' demo_session clean echo '+-----------------------------------------------------------------+' echo '| 2) Repeat session + added GLOBAL random noise |' echo '+-----------------------------------------------------------------+' demo_session globalnoise echo '+-----------------------------------------------------------------+' echo '| 3) Repeat session + added PER VARIABLE random noise |' echo '+-----------------------------------------------------------------+' demo_session varnoise echo " -----> Q.E.D" # --- Demo using regularization # Not done here. We need a more challenging data-set to # demonstrate effective use of regularization. # echo '+-----------------------------------------------------------------+' # echo '| Repeat session + added anti-random noise (w/ --l2) |' # echo '+-----------------------------------------------------------------+' # demo_session regularize vowpal-wabbit-8.6.1.dfsg1/demo/random-noise/x-vs-y.r000077500000000000000000000054741332666127000221770ustar00rootroot00000000000000#!/usr/bin/Rscript # --vanilla # # Utility to plot two data-set numeric columns against each other, # and generate a X-vs-Y chart + pearson correlation between the two. # # Usage: x-vs-y.r data_file1 data_file2 pngfile # Where: # data_file1 & data_file2 # each contains one data column from some data-set. # Columns can be easily pre-extracted into the files using # cut -d... -f... dataset # # pngfile # is the output chart # # -- where to look for R libraries # .libPaths(c('~/local/lib/R', # '/usr/lib/R/library', # '/usr/lib/R/site-library' # )) suppressPackageStartupMessages(library(ggplot2)) one_column <- function(filename, sep, fieldno) { cmd <- sprintf("cut -d '%s' -f %d '%s'", sep, fieldno, filename) read.table(pipe(cmd), header=F)[[1]] } ratio = 1 W = 6 H = W / ratio DPI = 200 FONTSIZE = 12 MyGray = 'grey50' title.theme = element_text(family="FreeSans", face="bold.italic", size=FONTSIZE) x.title.theme = element_text(family="FreeSans", face="bold.italic", size=FONTSIZE, vjust=-0.1) y.title.theme = element_text(family="FreeSans", face="bold.italic", size=FONTSIZE, angle=90, vjust=0.2) x.axis.theme = element_text(family="FreeSans", face="bold", size=FONTSIZE-2, colour=MyGray) y.axis.theme = element_text(family="FreeSans", face="bold", size=FONTSIZE-2, colour=MyGray) legend.theme = element_text(family="FreeSans", face="bold.italic", size=FONTSIZE-1, colour="black") eprintf <- function(...) cat(sprintf(...), sep='', file=stderr()) argv <- commandArgs(trailingOnly = TRUE) # Xs <- read.csv(argv[1], header=F, col.names='X', colClasses=c('numeric')) Xs <- as.numeric(one_column(argv[1], sep=' ', 1)) # Ys <- read.csv(argv[2], header=F, col.names='Y', colClasses=c('numeric')) Ys <- as.numeric(one_column(argv[2], sep=' ', 1)) pearson <- as.numeric(cor(Xs, Ys)) eprintf("\nPearson Correlation: %.12f\n", pearson) d <- data.frame(X=Xs, Y=Ys) title <- sprintf('vw demo: expected vs actual values\nPearson correlation: %.12f', pearson) adaptive.alpha <- 0.02 + (abs(2.0 - (2.0 * pearson))) * 0.3 # eprintf("adaptive.alpha=%g\n", adaptive.alpha) g <- ggplot(data=d, aes(x=X, y=Y), ) + geom_point(shape=20, alpha=adaptive.alpha, size=0.4) + ggtitle(title) + theme( plot.title=title.theme, axis.title.y=y.title.theme, axis.title.x=x.title.theme, axis.text.x=x.axis.theme, axis.text.y=y.axis.theme ) pngfile <- ifelse(exists(argv[3]) && nchar(argv[3]) > 0, argv[3], 'X-vs-Y.png') # eprintf("ggsave: pngfile=%s\n", pngfile) ggsave(g, file=pngfile, width=W, height=H, dpi=DPI) vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/000077500000000000000000000000001332666127000205225ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/Makefile000066400000000000000000000016651332666127000221720ustar00rootroot00000000000000.SECONDARY: bits?=26 passes?=80 defaults: defaults.vw aloi_test.vw.gz zcat $(word 2,$^) | sed 's/|/|f/' | ../../vowpalwabbit/vw -t -i $< defaults.vw: aloi_train.vw.gz zcat $< | sed 's/|/|f/' | ../../vowpalwabbit/vw -f $@ -k -c --passes $(passes) --recall_tree 1000 -b $(bits) --loss_function logistic -q'\x88': -l 1 tuned: tuned.vw aloi_test.vw.gz zcat $(word 2,$^) | sed 's/|/|f/' | ../../vowpalwabbit/vw -t -i $< tuned.vw: aloi_train.vw.gz zcat $< | sed 's/|/|f/' | ../../vowpalwabbit/vw -f $@ -k -c --passes $(passes) --recall_tree 1000 -b $(bits) --loss_function logistic -q'\x88': -l 0.85 --randomized_routing 1 --max_depth 12 --max_candidates 58 --bern_hyper 0.01 aloi_test.vw.gz: wget http://lowrank.net/nikos/aloi_test.vw.gz aloi_train.vw.gz: wget http://lowrank.net/nikos/aloi_train.vw.gz oaa: oaa.vw aloi_test.vw.gz vw -t -i $^ oaa.vw: aloi_train.vw.gz vw -f $@ -k -c --passes $(passes) --oaa 1000 $< -b $(bits) -l 1.86 vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/README.md000066400000000000000000000055631332666127000220120ustar00rootroot00000000000000Recall tree demo ------------------------------- This demo exercises the recall tree reduction for logarithmic time multiclass classification. The dataset used is [ALOI](http://aloi.science.uva.nl/), which only has 1000 classes in it. Due to constant factor overhead, I wouldn't recommend using recall tree unless you have at least 10,000 classes; but it's good to have a demo that runs quickly without a lot of memory. The demo targets are: - `make defaults`: trains and tests using what are essentially the defaults for recall tree (see below). Eventually yields 11.2% test error. - `make tuned`: trains and tests using hyperparameters found via random probe hyperparameter tuning. Eventually yields 9.9% test error. - `make oaa`: trains and tests one-against-all using hyperparameters found via random probe hyperparameter tuning. Yields 12.1% test error (in less time than recall tree, it turns out). ### Recall Tree for your problem ### - As indicated, if you have less than 10000 classes, you should probably be using OAA. To accelerate training you can use negative gradient subsampling and multicore training via hogwild. - If you're still reading, you have more than 10000 classes. Therefore you will need many bits in your predictor. Use as many as you can afford computationally. - If your features are all binary then try disabling normalized updates via `--adaptive --invariant`, which is like having an extra bit in your predictor without the memory cost. - Logistic loss (`--loss_function logistic`) always works better, as far as I can tell. - There are extra features, consisting of the identities of the nodes in the routing tree, which are added to the example before passing to the underlying binary classifier. - These are located in namespace '\x88'. So, for example, you can interact them with other namespaces as in this demo (see Makefile: this is what `-q '\x88':` is doing.) - For problems with lots of features, interacting with all the path features can be both a computational and statistical drag. Enabling the option `--node_only` only generates a single feature corresponding to the identity of the leaf node in the routing tree, which can be better under these conditions. - Typically increasing the number of candidates per leaf `--max_candidates` from the default will improve accuracy at the cost of additional computation. - Computational overhead increases with deeper trees, but (test) accuracy does not necessarily increase. Therefore you have to play around with `--max_depth` and `--bern_hyper`. - Increasing `--bern_hyper` discourages deeper trees, decreasing it encourages deeper trees. - `--max_depth` is a hard limit. - Randomized routing `--randomized_routing` is a regularizer we came up with which sometimes works well. It's worth trying, but note that it will make your loss on the training set appear worse, so you'll need to assess on test data. vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/imagenet/000077500000000000000000000000001332666127000223135ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/imagenet/Makefile000066400000000000000000000077421332666127000237650ustar00rootroot00000000000000SHELL=/bin/zsh numlabels=21841 bits?=30 .SECONDARY: training.txt.gz: wget --tries=100 -c http://hunch.net/~jl/datasets/imagenet/training.txt.gz # ln -s /data/jcl/training.txt.gz testing.txt.gz: wget --tries=100 -c http://hunch.net/~jl/datasets/imagenet/testing.txt.gz # ln -s /data/jcl/testing.txt.gz full_oaa_hyper%: full_oaa_hyper%.vw testing.txt.gz time ./do-oaa-hogwild $(word 2,$^) -i $< -t full_oaa_hyper%.vw: training.txt.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; print join " ", $$eta;' | \ while read eta ; \ do ( \ echo $$eta ; \ ./do-oaa-hogwild $< -f $@ --oaa $(numlabels) --oaa_subsample 1000 -b $(bits) --loss_function logistic -l $$eta ; \ ) \ done #> full_oaa_hyper$*.train 2>&1; \ oaa_hyper%: training.txt.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; print join " ", $$eta;' | \ while read eta ; \ do ( \ echo $$eta ; \ vw <(zcat $< | head -500000) --oaa $(numlabels) --oaa_subsample 1000 -b $(bits) --loss_function logistic -l $$eta ; \ ) > $@ 2>&1; \ done timefufull_hyper%: full_hyper%.vw testing.txt.gz (time vw -t -i $< <(zcat $(word 2,$^) | head -10000); time vw -t -i $< <(zcat $(word 2,$^) | head -20000)) > $@ 2>&1 full_hyper%: full_hyper%.vw testing.txt.gz (time vw -t -i $(word 1,$^) <(zcat $(word 2,$^) | sed 's/|/|b/')) > $@ 2>&1 full_hyper%.vw: training.txt.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; $$depth = int (11 + 6 * rand ()); \ 1; $$mc = int (20 + 80 * rand ()); \ 1; $$log_hyper = log (0.01) + (log (10) - log (0.01)) * rand (); \ 1; $$rr = rand () > 0.5 ? 1 : 0; \ 1; $$dq = rand () > 0.5 ? "a" : "b"; \ 1; print join " ", $$eta, $$depth, $$mc, exp ($$log_hyper), $$rr, $$dq;' | \ while read eta depth mc hyper rr dq; \ do ( \ echo $$eta $$depth $$mc $$hyper $$rr $$dq; \ vw -f $@ --recall_tree $(numlabels) <(zcat $< | sed 's/|/|b/') -b $(bits) --loss_function logistic -l $$eta --max_depth $$depth --max_candidates $$mc --bern_hyper $$hyper --link glf1 --randomized_routing $$rr --node_only 1 -q '\x88'$$dq; \ ) > full_hyper$*.train 2>&1; \ done hyper%: training.txt.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; $$depth = int (11 + 6 * rand ()); \ 1; $$mc = int (20 + 80 * rand ()); \ 1; $$log_hyper = log (0.01) + (log (10) - log (0.01)) * rand (); \ 1; $$rr = rand () > 0.5 ? 1 : 0; \ 1; $$dq = rand () > 0.5 ? "a" : "b"; \ 1; print join " ", $$eta, $$depth, $$mc, exp ($$log_hyper), $$rr, $$dq;' | \ while read eta depth mc hyper rr dq; \ do ( \ echo $$eta $$depth $$mc $$hyper $$rr $$dq; \ vw --recall_tree $(numlabels) <(zcat $< | sed 's/|/|b/' | head -500000) -b $(bits) --loss_function logistic -l $$eta --max_depth $$depth --max_candidates $$mc --bern_hyper $$hyper --link glf1 --randomized_routing $$rr --node_only 1 -q '\x88'$$dq; \ ) > $@ 2>&1; \ done log_multi: log_multi.vw testing.txt.gz time vw -t -i $^ log_multi.vw: training.txt.gz time vw -f $@ --log_multi $(numlabels) $< -b $(bits) --loss_function logistic -l 1 recall_tree: recall_tree.vw testing.txt.gz time vw -t -i $(word 1,$^) <(zcat $(word 2,$^) | sed 's/|/|b/') recall_tree.vw: training.txt.gz time vw -f $@ --recall_tree $(numlabels) <(zcat $< | sed 's/|/|b/') -b $(bits) --loss_function logistic -l 1 --node_only 1 -q '\x88'b #--link glf1 --randomized_routing 1 #$(randomized_routing) oaahogwild.vw: training.txt.gz time ./do-oaa-hogwild $< -f $@ --oaa $(numlabels) --oaa_subsample 1000 -b $(bits) --loss_function logistic -l 1 oaahogwild: oaahogwild.vw testing.txt.gz (time vw -t -i $^) > $@ 2>&1 #(time ./do-oaa-hogwild -t -i $^) > $@ 2>&1 timefuoaahogwild: oaahogwild.vw testing.txt.gz (time vw -t -i $< <(zcat $(word 2,$^) | head -10000); time vw -t -i $< <(zcat $(word 2,$^) | head -20000)) > $@ 2>&1 vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/imagenet/README.md000066400000000000000000000003771332666127000236010ustar00rootroot00000000000000Recall tree demo for imagenet ------------------------------- This demo exercises the recall tree reduction for logarithmic time multiclass classification on the imagenet dataset. It takes a long time to run, a lot of memory, and a lot of disk space. vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/imagenet/do-oaa-hogwild000077500000000000000000000040361332666127000250370ustar00rootroot00000000000000#! /bin/zsh nukeem() { \ trap - INT QUIT TERM pkill -9 -f 'vw.*--port 26543' } if which netcat >/dev/null 2>/dev/null; then netcat=netcat elif which nc >/dev/null 2>/dev/null; then netcat=nc else echo "Install netcat or nc" >&2 exit 1 fi data="$1" shift { vw "$@" --num_children 24 --port 26543 2>&1 | perl -lane 'print $_ unless $F[2] =~ /^\d$/ && $c{$F[2]}++;' } & trap 'nukeem; exit 1' INT QUIT TERM while ! $netcat -z localhost 26543 do sleep 1 done sleep 5 zcat $data | \ ./map \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) \ >($netcat localhost 26543 > /dev/null) pkill -f 'vw.*--port 26543' while test "x$1" = "x-f" && test ! -s "$2" do sleep 1 done vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/imagenet/map000077500000000000000000000010571332666127000230210ustar00rootroot00000000000000#! /usr/bin/env perl use warnings; use strict; use IO::File; use IO::Select; my @mappers = map { $_->autoflush (1); $_ } map { new IO::File $_, "w" or die "$_: $!" } @ARGV; my $bufsize = 1; my @buf = grep { defined } map { scalar } (1 .. $bufsize); my $n = 0; while (@buf) { my $fh = $mappers[$n % @mappers]; my @ready = IO::Select->new ($fh)->can_write (20000); if (@ready) { print $fh @buf; @buf = grep { defined } map { scalar } (1 .. $bufsize); } ++$n; } vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/ltcb/000077500000000000000000000000001332666127000214465ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/ltcb/Makefile000066400000000000000000000252641332666127000231170ustar00rootroot00000000000000SHELL=/bin/zsh .SECONDARY: numlabels=80000 all: enwik8.train.vw.gz sweep_oaa%: sweep_oaa%.vw enwik8.test.vw.gz (time vw -t -i $(word 1,$^) <(zcat $(word 2,$^) | perl -ne '($$l,$$b)=split /\s/, $$_, 2; $$l = $(numlabels)+2-$$l; $$l = 1+$* if $$l > $*; print "$$l $$b"')) > $@ 2>&1 sweep_oaa%.vw: enwik8.train.vw.gz (time vw -f $@ --oaa $$((1+$*)) --oaa_subsample 1000 <(zcat $< | perl -ne '($$l,$$b)=split /\s/, $$_, 2; $$l = $(numlabels)+2-$$l; print "$$l $$b" if $$l <= $*') -q bc --cubic bcd -b 30 --loss_function logistic -l 1) > sweep_oaa$*.train 2>&1 full_inorder_log_multihyper%.vw: bits=32 full_inorder_log_multihyper%.vw: enwik9.inorder.train.vw.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; $$nodes = int (10000 + 40000 * rand ()); \ 1; $$loss = rand () > 0.5 ? "squared" : "logistic"; \ 1; $$swap = 2 ** (int (4 * rand ())); \ 1; $$quad = rand () > 0.5 ? "z" : "b"; \ 1; $$cubic = rand () > 0.5 ? "z" : "b"; \ 1; print join " ", $$eta, $$nodes, $$loss, $$swap, $$quad, $$cubic;' | \ while read eta nodes loss swap quad cubic; \ do ( \ echo $$eta $$nodes $$loss $$swap $$quad $$cubic; \ time vw -f $@ --log_multi $$(( $(numlabels)+1 )) $< -q c$$quad --cubic cd$$cubic -b $(bits) --loss_function $$loss -l $$eta --swap_resistance $$swap; \ ) > full_inorder_log_multihyper$*.train 2>&1; \ done full_log_multihyper%.vw: bits=32 full_log_multihyper%.vw: enwik9.train.vw.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; $$nodes = int (10000 + 40000 * rand ()); \ 1; $$loss = rand () > 0.5 ? "squared" : "logistic"; \ 1; $$swap = 2 ** (int (4 * rand ())); \ 1; $$quad = rand () > 0.5 ? "z" : "b"; \ 1; $$cubic = rand () > 0.5 ? "z" : "b"; \ 1; print join " ", $$eta, $$nodes, $$loss, $$swap, $$quad, $$cubic;' | \ while read eta nodes loss swap quad cubic; \ do ( \ echo $$eta $$nodes $$loss $$swap $$quad $$cubic; \ time vw -f $@ --log_multi $$(( $(numlabels)+1 )) $< -q c$$quad --cubic cd$$cubic -b $(bits) --loss_function $$loss -l $$eta --swap_resistance $$swap; \ ) > full_log_multihyper$*.train 2>&1; \ done full_log_multihyper%: full_log_multihyper%.vw enwik9.test.vw.gz (time vw -t -i $^) > $@ 2>&1 log_multihyper%: bits=30 log_multihyper%: enwik8.train.vw.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; $$nodes = int (10000 + 40000 * rand ()); \ 1; $$loss = rand () > 0.5 ? "squared" : "logistic"; \ 1; $$swap = 2 ** (int (4 * rand ())); \ 1; $$quad = rand () > 0.5 ? "z" : "b"; \ 1; $$cubic = rand () > 0.5 ? "z" : "b"; \ 1; print join " ", $$eta, $$nodes, $$loss, $$swap, $$quad, $$cubic;' | \ while read eta nodes loss swap quad cubic; \ do ( \ echo $$eta $$nodes $$loss $$swap $$quad $$cubic; \ vw --log_multi $$(( $(numlabels)+1 )) $< -q c$$quad --cubic cd$$cubic -b $(bits) --loss_function $$loss -l $$eta --swap_resistance $$swap; \ ) > $@ 2>&1; \ done full_inorder_hyper%.vw: enwik9.inorder.train.vw.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; $$depth = int (11 + 6 * rand ()); \ 1; $$mc = int (20 + 80 * rand ()); \ 1; $$log_hyper = log (0.01) + (log (10) - log (0.01)) * rand (); \ 1; $$rr = rand () > 0.5 ? 1 : 0; \ 1; $$no = rand () > 0.5 ? 1 : 0; \ 1; $$dq = rand () > 0.5 ? "z" : "b"; \ 1; $$cubic = rand () > 0.5 ? "z" : "b"; \ 1; $$pf = rand () > 0.5 ? "z" : "b"; \ 1; $$pfc = rand () > 0.5 ? "z" : "b"; \ 1; print join " ", $$eta, $$depth, $$mc, exp ($$log_hyper), $$rr, $$no, $$dq, $$cubic, $$pf, $$pfc;' | \ while read eta depth mc hyper rr no dq cubic pf pfc; \ do ( \ echo $$eta $$depth $$mc $$hyper $$rr $$no $$dq $$cubic $$pf $$pfc; \ time vw -f $@ $< -b 32 --recall_tree $$(( $(numlabels)+1 )) -l $$eta --max_depth $$depth --max_candidates $$mc --bern_hyper $$hyper --randomized_routing $$rr --node_only $$no -q c$$dq --cubic cd$$cubic -q '\x88'$$pf --cubic '\x88'c$$pfc; \ ) > full_inorder_hyper$*.train 2>&1; \ done full_hyper%: full_hyper%.vw enwik9.test.vw.gz (time vw -t -i $^) > $@ 2>&1 full_hyper%.vw: enwik9.train.vw.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; $$depth = int (11 + 6 * rand ()); \ 1; $$mc = int (20 + 80 * rand ()); \ 1; $$log_hyper = log (0.01) + (log (10) - log (0.01)) * rand (); \ 1; $$rr = rand () > 0.5 ? 1 : 0; \ 1; $$no = rand () > 0.5 ? 1 : 0; \ 1; $$dq = rand () > 0.5 ? "z" : "b"; \ 1; $$cubic = rand () > 0.5 ? "z" : "b"; \ 1; $$pf = rand () > 0.5 ? "z" : "b"; \ 1; $$pfc = rand () > 0.5 ? "z" : "b"; \ 1; print join " ", $$eta, $$depth, $$mc, exp ($$log_hyper), $$rr, $$no, $$dq, $$cubic, $$pf, $$pfc;' | \ while read eta depth mc hyper rr no dq cubic pf pfc; \ do ( \ echo $$eta $$depth $$mc $$hyper $$rr $$no $$dq $$cubic $$pf $$pfc; \ time vw -f $@ $< -b 32 --recall_tree $$(( $(numlabels)+1 )) -l $$eta --max_depth $$depth --max_candidates $$mc --bern_hyper $$hyper --randomized_routing $$rr --node_only $$no -q c$$dq --cubic cd$$cubic -q '\x88'$$pf --cubic '\x88'c$$pfc; \ ) > full_hyper$*.train 2>&1; \ done full_oaa_hyper%: full_oaa_hyper%.vw enwik9.test.vw.gz (time vw -t -i $^) > $@ 2>&1 # hogwild fails with more bits ... some kind of deadlock issue :( full_oaa_hyper%.vw: bits=30 full_oaa_hyper%.vw: enwik9.train.vw.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; print join " ", $$eta;' | \ while read eta; \ do ( \ echo $$eta; \ unbuffer ./do-oaa-hogwild $< -f $@ --oaa $$(( $(numlabels)+1 )) --oaa_subsample 2000 -q bc --cubic bcd -b $(bits) --loss_function logistic -l $$eta; \ ) |& > full_oaa_hyper$*.train 2>&1; \ done oaa_hyper%: bits=30 oaa_hyper%: enwik8.train.vw.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; print join " ", $$eta;' | \ while read eta; \ do ( \ echo $$eta; \ vw $< --oaa $$(( $(numlabels)+1 )) --oaa_subsample 2000 -q bc --cubic bcd -b $(bits) --loss_function logistic -l $$eta; \ ) |& > $@ 2>&1; \ done hyper%: enwik8.train.vw.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; $$depth = int (11 + 6 * rand ()); \ 1; $$mc = int (20 + 80 * rand ()); \ 1; $$log_hyper = log (0.01) + (log (10) - log (0.01)) * rand (); \ 1; $$rr = rand () > 0.5 ? 1 : 0; \ 1; $$no = rand () > 0.5 ? 1 : 0; \ 1; $$dq = rand () > 0.5 ? "z" : "b"; \ 1; $$cubic = rand () > 0.5 ? "z" : "b"; \ 1; $$pf = rand () > 0.5 ? "z" : "b"; \ 1; $$pfc = rand () > 0.5 ? "z" : "b"; \ 1; print join " ", $$eta, $$depth, $$mc, exp ($$log_hyper), $$rr, $$no, $$dq, $$cubic, $$pf, $$pfc;' | \ while read eta depth mc hyper rr no dq cubic pf pfc; \ do ( \ echo $$eta $$depth $$mc $$hyper $$rr $$no $$dq $$cubic $$pf $$pfc; \ vw $< -b 30 --recall_tree $$(( $(numlabels)+1 )) -l $$eta --max_depth $$depth --max_candidates $$mc --bern_hyper $$hyper --randomized_routing $$rr --node_only $$no -q c$$dq --cubic cd$$cubic -q '\x88'$$pf --cubic '\x88'c$$pfc; \ ) > $@ 2>&1; \ done sweep_recall_tree%: sweep_recall_tree%.vw enwik8.test.vw.gz (time vw -t -i $^) > $@ 2>&1 sweep_recall_tree%.vw: bern_hyper=1 sweep_recall_tree%.vw: enwik8.train.vw.gz (time vw -f $@ --recall_tree $$(( $(numlabels)+1 )) $< -q bc --cubic bcd -b 30 --loss_function logistic -l 1 -q '\x88'b --cubic '\x88'bc --max_candidates $* --bern_hyper $(bern_hyper)) > sweep_recall_tree$*.train 2>&1 recall_tree%: recall_tree%.vw enwik%.test.vw.gz vw -t -i $^ recall_tree%.vw: vwextra= recall_tree%.vw: bits=32 recall_tree%.vw: enwik%.train.vw.gz vw -f $@ --recall_tree $$(( $(numlabels)+1 )) $< -q bc --cubic bcd -b $(bits) --loss_function logistic -l 1 -q '\x88'b --cubic '\x88'bc $(vwextra) #--link glf1 --randomized_routing $(randomized_routing) log_multi%: log_multi%.vw enwik%.test.vw.gz time vw -t -i $^ log_multi%.vw: enwik%.train.vw.gz time vw -f $@ --log_multi $$(( $(numlabels)+1 )) $< -q bc --cubic bcd -b 32 --loss_function logistic # NB: 30 instead of 32 here ... hogwild increases memory usage (?) oaahogwild%.vw: enwik%.train.vw.gz ./do-oaa-hogwild $< -f $@ --oaa $$(( $(numlabels)+1 )) --oaa_subsample 2000 -q bc --cubic bcd -b 30 --loss_function logistic -l 1 oaahogwild%: enwik%.test.vw.gz oaahogwild%.vw ./do-oaa-hogwild $< -i $(word 2,$^) -t enwik%.zip: wget -c http://mattmahoney.net/dc/enwik$*.zip enwik%.freq: enwik%.zip zcat enwik$*.zip | \ perl ./wikifil.pl | \ perl -lane 'foreach (@F) { $$c{$$_}++; } } \ 1; { while (($$k, $$v) = each %c) { print "$$k\t$$v"; }' | \ sort -k2rn > $@ enwik%.train.vw.gz: enwik%.freq enwik%.zip zcat enwik$*.zip | \ perl ./wikifil.pl | \ perl -ne 'BEGIN { srand 69; }; \ 1; if (10 * rand () < 1) { print STDERR $$_; \ 1; } else { print STDOUT $$_; }' \ 2> >(perl ./gendata.pl enwik$*.freq $(numlabels) 10000000 | gzip > enwik$*.test.vw.gz) \ > >(perl ./gendata.pl enwik$*.freq $(numlabels) 10000000 | gzip > enwik$*.train.vw.gz) # NB: shufbuf size of 1 => in order processing enwik%.inorder.train.vw.gz: enwik%.freq enwik%.zip zcat enwik$*.zip | \ perl ./wikifil.pl | \ perl -ne 'BEGIN { srand 69; }; \ 1; if (10 * rand () < 1) { print STDERR $$_; \ 1; } else { print STDOUT $$_; }' \ 2> >(perl ./gendata.pl enwik$*.freq $(numlabels) 1 | gzip > enwik$*.inorder.test.vw.gz) \ > >(perl ./gendata.pl enwik$*.freq $(numlabels) 1 | gzip > enwik$*.inorder.train.vw.gz) vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/ltcb/README.md000066400000000000000000000017321332666127000227300ustar00rootroot00000000000000Recall tree demo for ltcb ------------------------------- There are two versions of the LTCB: enwik8 is the small one, and enwik9 is the big one. http://mattmahoney.net/dc/textdata.html * make enwik8.train.vw.gz: makes the following files 1. enwik8.zip: downloaded 2. enwik8.freq: base frequency of each token in descending order 2a. labels are the first 80K of these 3. enwik8.train.vw.gz: training set 4. enwik8.test.vw.gz: test set * make oaa8: example invocation of VW on oaa for enwik8 * make enwik9.train.vw.gz: same files but larger (number of examples) * make oaa9: example invocation of VW on oaa for enwik9 the VW data files consist of the label and the 10 preceeding words, no boundary modeling, i.e., 11th word is first example generated, and there is no attempt to predict ``end of document'' infrequent words are replaced with unknown in the labels, so label 80001 is the unknown label. however all words are preseved in input. vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/ltcb/do-oaa-hogwild000077500000000000000000000040251332666127000241700ustar00rootroot00000000000000#! /bin/zsh nukeem() { \ trap - INT QUIT TERM pkill -9 -f 'vw.*--port 26542' } if which netcat >/dev/null 2>/dev/null; then netcat=netcat elif which nc >/dev/null 2>/dev/null; then netcat=nc else echo "Install netcat or nc" >&2 exit 1 fi data="$1" shift { vw "$@" --num_children 24 --port 26542 2>&1 | perl -lane 'print $_ unless $F[2] =~ /^\d$/ && $c{$F[2]}++;' } & trap 'nukeem; exit 1' INT QUIT TERM while ! $netcat -z localhost 26542 do sleep 1 done zcat $data | \ ./map \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) \ >($netcat localhost 26542 > /dev/null) pkill -f 'vw.*--port 26542' while test "x$1" = "x-f" && test ! -s "$2" do sleep 1 done vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/ltcb/gendata.pl000077500000000000000000000021761332666127000234170ustar00rootroot00000000000000#! /usr/bin/env perl use warnings; use strict; use IO::File; my $freqfile = shift @ARGV or die; my $numlabels = shift @ARGV or die; my $shufbufsize = shift @ARGV or die; my $freqfh = new IO::File $freqfile, "r" or die "$freqfile: $!"; my %dict; my $curlabels = $numlabels; while (defined ($_ = <$freqfh>)) { chomp; my ($key, undef) = split /\s+/, $_; $dict{$key} = $curlabels; --$curlabels; last unless $curlabels > 0; } srand 69; my $context = 6; my @shufbuf; my @charpos = split //, "abcdefghijklmnop"; $\="\n"; while (defined ($_ = )) { chomp; s/\|/_/g; s/:/_/g; # VW special characters my @tokens = split /\s+/, $_; foreach my $pos ($context ... $#tokens) { my $label = $dict{$tokens[$pos]} || $numlabels+1; my $index = int (rand ($shufbufsize)); print $shufbuf[$index] if length ($shufbuf[$index]); $shufbuf[$index] = join " ", $label, map { join "", " |", $charpos[$_], " ", $tokens[$pos - $_] } (1 .. $context); } } foreach my $index (0 .. $shufbufsize) { print $shufbuf[$index] if length ($shufbuf[$index]); } vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/ltcb/map000077500000000000000000000010551332666127000221520ustar00rootroot00000000000000#! /usr/bin/env perl use warnings; use strict; use IO::File; use IO::Select; my @mappers = map { $_->autoflush (0); $_ } map { new IO::File $_, "w" or die "$_: $!" } @ARGV; my $bufsize = 6; my @buf = grep { defined } map { scalar } (1 .. $bufsize); my $n = 0; while (@buf) { my $fh = $mappers[$n % @mappers]; my @ready = IO::Select->new ($fh)->can_write (100); if (@ready) { print $fh @buf; @buf = grep { defined } map { scalar } (1 .. $bufsize); } ++$n; } vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/ltcb/wikifil.pl000077500000000000000000000040651332666127000234510ustar00rootroot00000000000000#!/usr/bin/perl # Program to filter Wikipedia XML dumps to "clean" text consisting only of lowercase # letters (a-z, converted from A-Z), and spaces (never consecutive). # All other characters are converted to spaces. Only text which normally appears # in the web browser is displayed. Tables are removed. Image captions are # preserved. Links are converted to normal text. Digits are spelled out. # Written by Matt Mahoney, June 10, 2006. This program is released to the public domain. $/=">"; # input record separator $\="\n"; # output record separator while (<>) { if (/ ... if (/#redirect/i) {$text=0;} # remove #REDIRECT if ($text) { # Remove any text not normally visible if (/<\/text>/) {$text=0;} s/\n/ /g; # remove newlines s/<.*>//; # remove xml tags s/&/&/g; # decode URL encoded chars s/<//g; s///g; # remove references ... s/<[^>]*>//g; # remove xhtml tags s/\[http:[^] ]*/[/g; # remove normal url, preserve visible text s/\|thumb//ig; # remove images links, preserve caption s/\|left//ig; s/\|right//ig; s/\|\d+px//ig; s/\[\[image:[^\[\]]*\|//ig; s/\[\[category:([^|\]]*)[^]]*\]\]/[[$1]]/ig; # show categories without markup s/\[\[[a-z\-]*:[^\]]*\]\]//g; # remove links to other languages s/\[\[[^\|\]]*\|/[[/g; # remove wiki url, preserve visible text s/{{[^}]*}}//g; # remove {{icons}} and {tables} s/{[^}]*}//g; s/\[//g; # remove [ and ] s/\]//g; s/&[^;]*;/ /g; # remove URL encoded chars # convert to lowercase letters and spaces, spell digits $_=" $_ "; tr/A-Z/a-z/; s/0/ zero /g; s/1/ one /g; s/2/ two /g; s/3/ three /g; s/4/ four /g; s/5/ five /g; s/6/ six /g; s/7/ seven /g; s/8/ eight /g; s/9/ nine /g; tr/a-z/ /cs; chop; print $_ if $_ =~ /\S/; } } vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/odp/000077500000000000000000000000001332666127000213045ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/odp/Makefile000066400000000000000000000134471332666127000227550ustar00rootroot00000000000000SHELL=/bin/zsh .SECONDARY: numlabels=105033 bits?=31 odp_train.vw.gz: wget --tries=100 -c http://hunch.net/~vw/odp_train.vw.gz odp_test.vw.gz: wget --tries=100 -c http://hunch.net/~vw/odp_test.vw.gz full_hyper%: full_hyper%.vw odp_test.vw.gz time vw -t -i $(word 1,$^) <(zcat $(word 2,$^) | sed 's/|/|b/') ufull_hyper%.vw: bits=30 ufull_hyper%.vw: odp_train.vw.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; $$depth = int (12 + 6 * rand ()); \ 1; $$mc = int (40 + 80 * rand ()); \ 1; $$log_hyper = log (0.01) + (log (10) - log (0.01)) * rand (); \ 1; $$rr = rand () > 0.5 ? 1 : 0; \ 1; $$dq = rand () > 0.5 ? "a" : "b"; \ 1; $$no = rand () > 0.5 ? "0" : "1"; \ 1; print join " ", $$eta, $$depth, $$mc, exp ($$log_hyper), $$rr, $$dq, $$no;' | \ while read eta depth mc hyper rr dq no; \ do ( \ echo $$eta $$depth $$mc $$hyper $$rr $$dq $$no; \ time vw -f $@ --recall_tree $(numlabels) <((zcat $<; zcat $<; zcat $<; zcat $<; zcat $<) | sed 's/|/|b/' | ./minishuf 100000) -b $(bits) --loss_function logistic -l $$eta --max_depth $$depth --max_candidates $$mc --bern_hyper $$hyper --link glf1 --randomized_routing $$rr --node_only $$no -q '\x88'$$dq; \ ) > $@.train 2>&1; \ done pfull_hyper%.vw: bits=30 pfull_hyper%.vw: odp_train.vw.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; $$depth = int (12 + 6 * rand ()); \ 1; $$mc = int (40 + 80 * rand ()); \ 1; $$log_hyper = log (0.01) + (log (10) - log (0.01)) * rand (); \ 1; $$rr = rand () > 0.5 ? 1 : 0; \ 1; $$dq = rand () > 0.5 ? "a" : "b"; \ 1; $$no = rand () > 0.5 ? "0" : "1"; \ 1; print join " ", $$eta, $$depth, $$mc, exp ($$log_hyper), $$rr, $$dq, $$no;' | \ while read eta depth mc hyper rr dq no; \ do ( \ echo $$eta $$depth $$mc $$hyper $$rr $$dq $$no; \ time vw -f $@ --recall_tree $(numlabels) <((zcat $<; zcat $<; zcat $<; zcat $<) | sed 's/|/|b/' | ./minishuf 100000) -b $(bits) --loss_function logistic -l $$eta --max_depth $$depth --max_candidates $$mc --bern_hyper $$hyper --link glf1 --randomized_routing $$rr --node_only $$no -q '\x88'$$dq; \ ) > $@.train 2>&1; \ done mfull_hyper%.vw: bits=30 mfull_hyper%.vw: odp_train.vw.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; $$depth = int (12 + 6 * rand ()); \ 1; $$mc = int (40 + 80 * rand ()); \ 1; $$log_hyper = log (0.01) + (log (10) - log (0.01)) * rand (); \ 1; $$rr = rand () > 0.5 ? 1 : 0; \ 1; $$dq = rand () > 0.5 ? "a" : "b"; \ 1; $$no = rand () > 0.5 ? "0" : "1"; \ 1; print join " ", $$eta, $$depth, $$mc, exp ($$log_hyper), $$rr, $$dq, $$no;' | \ while read eta depth mc hyper rr dq no; \ do ( \ echo $$eta $$depth $$mc $$hyper $$rr $$dq $$no; \ time vw -f $@ --recall_tree $(numlabels) <((zcat $<; zcat $<; zcat $<) | sed 's/|/|b/' | ./minishuf 100000) -b $(bits) --loss_function logistic -l $$eta --max_depth $$depth --max_candidates $$mc --bern_hyper $$hyper --link glf1 --randomized_routing $$rr --node_only $$no -q '\x88'$$dq; \ ) > $@.train 2>&1; \ done full_hyper%.vw: bits=31 full_hyper%.vw: odp_train.vw.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; $$depth = int (12 + 6 * rand ()); \ 1; $$mc = int (40 + 80 * rand ()); \ 1; $$log_hyper = log (0.01) + (log (10) - log (0.01)) * rand (); \ 1; $$rr = rand () > 0.5 ? 1 : 0; \ 1; $$dq = rand () > 0.5 ? "a" : "b"; \ 1; $$no = rand () > 0.5 ? "0" : "1"; \ 1; print join " ", $$eta, $$depth, $$mc, exp ($$log_hyper), $$rr, $$dq, $$no;' | \ while read eta depth mc hyper rr dq no; \ do ( \ echo $$eta $$depth $$mc $$hyper $$rr $$dq $$no; \ time vw -f $@ --recall_tree $(numlabels) <((zcat $<; zcat $<) | sed 's/|/|b/' | ./minishuf 100000) -b $(bits) --loss_function logistic -l $$eta --max_depth $$depth --max_candidates $$mc --bern_hyper $$hyper --link glf1 --randomized_routing $$rr --node_only $$no -q '\x88'$$dq; \ ) > full_hyper$*.train 2>&1; \ done hyper%: bits=29 hyper%: odp_train.vw.gz perl -le 'BEGIN { srand $*; }; \ 1; $$eta = 0.1 + 3.9 * rand (); \ 1; $$depth = int (8 + 6 * rand ()); \ 1; $$mc = int (40 + 120 * rand ()); \ 1; $$log_hyper = log (0.01) + (log (10) - log (0.01)) * rand (); \ 1; $$rr = rand () > 0.5 ? 1 : 0; \ 1; $$dq = rand () > 0.5 ? "a" : "b"; \ 1; $$no = rand () > 0.5 ? "0" : "1"; \ 1; print join " ", $$eta, $$depth, $$mc, exp ($$log_hyper), $$rr, $$dq, $$no;' | \ while read eta depth mc hyper rr dq no; \ do ( \ echo $$eta $$depth $$mc $$hyper $$rr $$dq $$no; \ vw --recall_tree $(numlabels) <(zcat $< | sed 's/|/|b/' | ./minishuf 100000) -b $(bits) --loss_function logistic -l $$eta --max_depth $$depth --max_candidates $$mc --bern_hyper $$hyper --link glf1 --randomized_routing $$rr --node_only $$no -q '\x88'$$dq; \ ) > $@ 2>&1; \ done log_multi: log_multi.vw odp_test.vw.gz time vw -t -i $^ log_multi.vw: odp_train.vw.gz time vw -f $@ --log_multi $(numlabels) $< -b $(bits) --loss_function logistic -l 1 recall_tree: recall_tree.vw odp_test.vw.gz time vw -t -i $(word 1,$^) <(zcat $(word 2,$^) | sed 's/|/|b/') recall_tree.vw: odp_train.vw.gz time vw -f $@ --recall_tree $(numlabels) <(zcat $< | sed 's/|/|b/' | ./minishuf 100000) -b $(bits) --loss_function logistic -l 1 --node_only 1 -q '\x88'b --max_candidates 400 --max_depth 5 oaahogwild.vw: odp_train.vw.gz time ./do-oaa-hogwild <(zcat $< | ./minishuf 100000 | gzip -c) -f $@ --oaa $(numlabels) --oaa_subsample 1000 -b $(bits) --loss_function logistic -l 1 oaahogwild: oaahogwild.vw odp_test.vw.gz time ./do-oaa-hogwild $(word 2,$^) -t -i $(word 1,$^) vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/odp/README.md000066400000000000000000000003651332666127000225670ustar00rootroot00000000000000Recall tree demo for odp ------------------------------- This demo exercises the recall tree reduction for logarithmic time multiclass classification on the odp dataset. It takes a long time to run, a lot of memory, and a lot of disk space. vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/odp/do-oaa-hogwild000077500000000000000000000040371332666127000240310ustar00rootroot00000000000000#! /bin/zsh nukeem() { \ trap - INT QUIT TERM pkill -9 -f 'vw.*--port 26544' } if which netcat >/dev/null 2>/dev/null; then netcat=netcat elif which nc >/dev/null 2>/dev/null; then netcat=nc else echo "Install netcat or nc" >&2 exit 1 fi data="$1" shift { vw "$@" --num_children 24 --port 26544 2>&1 | perl -lane 'print $_ unless $F[2] =~ /^\d$/ && $c{$F[2]}++;' } & trap 'nukeem; exit 1' INT QUIT TERM while ! $netcat -z localhost 26544 do sleep 1 done sleep 30 zcat $data | \ ./map \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) \ >($netcat localhost 26544 > /dev/null) pkill -f 'vw.*--port 26544' while test "x$1" = "x-f" && test ! -s "$2" do sleep 1 done vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/odp/map000077500000000000000000000010571332666127000220120ustar00rootroot00000000000000#! /usr/bin/env perl use warnings; use strict; use IO::File; use IO::Select; my @mappers = map { $_->autoflush (1); $_ } map { new IO::File $_, "w" or die "$_: $!" } @ARGV; my $bufsize = 4; my @buf = grep { defined } map { scalar } (1 .. $bufsize); my $n = 0; while (@buf) { my $fh = $mappers[$n % @mappers]; my @ready = IO::Select->new ($fh)->can_write (20000); if (@ready) { print $fh @buf; @buf = grep { defined } map { scalar } (1 .. $bufsize); } ++$n; } vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/odp/minishuf000077500000000000000000000005151332666127000230550ustar00rootroot00000000000000#! /usr/bin/env perl use warnings; use strict; srand 69; my $bufsize = shift @ARGV or die; my @buf; while (defined ($_ = <>)) { my $index = int rand $bufsize; print $buf[$index] if defined $buf[$index]; $buf[$index] = $_; } foreach my $index (0 .. $#buf) { print $buf[$index] if defined $buf[$index]; } vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/wikipara/000077500000000000000000000000001332666127000223315ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/wikipara/.gitignore000066400000000000000000000001211332666127000243130ustar00rootroot00000000000000docid2label enwiki-20170601-pages-articles-multistream.xml.bz2 paradata*.gz text vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/wikipara/DocGenerator.py000066400000000000000000000014541332666127000252630ustar00rootroot00000000000000import bz2 import re import string #------------------------------------------------- # iterate over documents #------------------------------------------------- def docs(filename): docid=None startdocregex=re.compile('": yield int(docid), paragraphs docid=None elif not line.isspace(): if "\n" in line: curpara.append(line.rstrip('\n')) paragraphs.append(' '.join(curpara)) curpara=[] else: curpara.appand(line) if docid is None: m=startdocregex.match (line) if m is not None: docid=m.group(1) paragraphs=[] curpara=[] vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/wikipara/Makefile000066400000000000000000000024511332666127000237730ustar00rootroot00000000000000SHELL=/bin/zsh .SECONDARY: .PHONY: all ratelimit?=1M all: paradata10000.vw.train.gz paradata%.vw.train.gz: makeparadata.py text/AA/wiki_00.shuf.bz2 python ./makeparadata.py $* 4 \ > >(perl -pe 'BEGIN { srand 69; }; $$_ = rand()."\t$$_";' | \ sort -t$$'\t' -k1n -S30% | cut -f2- | gzip > paradata$*.vw.train.gz) \ 2> >(perl -pe 'BEGIN { srand 69; }; $$_ = rand()."\t$$_";' | \ sort -t$$'\t' -k1n -S30% | cut -f2- | gzip > paradata$*.vw.test.gz) while ! test -s paradata$*.vw.test.gz; do sleep 1; done while ! test -s paradata$*.vw.train.gz; do sleep 1; done enwiki-20170601-pages-articles-multistream.xml.bz2: wget --limit-rate=$(ratelimit) http://dumps.wikimedia.your.org/enwiki/20170601/$@ text/AA/wiki_00.bz2: WikiExtractor.py enwiki-20170601-pages-articles-multistream.xml.bz2 python ./WikiExtractor.py --no-templates --bytes 100G -c <(bzcat enwiki-20170601-pages-articles-multistream.xml.bz2) text/AA/wiki_00.shuf.bz2: text/AA/wiki_00.bz2 bzcat $< | \ perl -ne 'BEGIN { srand 69; $$/="\n"; }; \ 1; $$r = rand (); print "$$r\001$$_\000";' | \ sort -k1n -t$$'\001' -z -S50% --compress-program=lzop | \ perl -F'\001' -ane 'BEGIN { $$/="\000"; }; \ 1; chomp($$F[1]); print $$F[1]' | \ bzip2 > $@ vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/wikipara/README.md000066400000000000000000000010331332666127000236050ustar00rootroot00000000000000Wikipara data set ------------------------------- This is a data set generation scheme we were playing around with which allows us to vary the number of classes and the number of training examples per class. We take some number of wikipedia pages and use the page (id) as the class. We then take some number of paragraphs from the page as training examples and some other number of paragraphs from the page as test examples. A setting we found particularly challenging was 10,000 classes with 3 training examples and 1 test example. vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/wikipara/WikiExtractor.py000077500000000000000000002672141332666127000255210ustar00rootroot00000000000000#!/usr/bin/python # -*- coding: utf-8 -*- # # ============================================================================= # Version: 2.39 (September 29, 2015) # Author: Giuseppe Attardi (attardi@di.unipi.it), University of Pisa # # Contributors: # Antonio Fuschetto (fuschett@aol.com) # Leonardo Souza (lsouza@amtera.com.br) # Juan Manuel Caicedo (juan@cavorite.com) # Humberto Pereira (begini@gmail.com) # Siegfried-A. Gevatter (siegfried@gevatter.com) # Pedro Assis (pedroh2306@gmail.com) # Wim Muskee (wimmuskee@gmail.com) # Radics Geza (radicsge@gmail.com) # # ============================================================================= # Copyright (c) 2009-2015. Giuseppe Attardi (attardi@di.unipi.it). # ============================================================================= # This file is part of Tanl. # # Tanl is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License, version 3, # as published by the Free Software Foundation. # # Tanl is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # ============================================================================= """Wikipedia Extractor: Extracts and cleans text from a Wikipedia database dump and stores output in a number of files of similar size in a given directory. Each file will contain several documents in the format: ... This version performs template expansion by preprocesssng the whole dump and collecting template definitions. """ import sys, os.path, time import re # TODO use regex when it will be standard import argparse, random from itertools import izip, izip_longest import logging import urllib import bz2 import codecs from htmlentitydefs import name2codepoint from multiprocessing import Queue, JoinableQueue, Process, Manager, cpu_count from cStringIO import StringIO import fileinput from timeit import default_timer #=========================================================================== # Program version version = '2.39' ### PARAMS #################################################################### ## # Defined in # We include as default Template, when loading external template file. knownNamespaces = set(['Template']) ## # The namespace used for template definitions # It is the name associated with namespace key=10 in the siteinfo header. templateNamespace = '' ## # The namespace used for module definitions # It is the name associated with namespace key=828 in the siteinfo header. moduleNamespace = '' ## # Recognize only these namespaces # w: Internal links to the Wikipedia # wiktionary: Wiki dictionary # wikt: shortcut for Wiktionary # acceptedNamespaces = ['w', 'wiktionary', 'wikt'] ## # Drop these elements from article text # discardElements = [ 'gallery', 'timeline', 'noinclude', 'pre', 'table', 'tr', 'td', 'th', 'caption', 'div', 'form', 'input', 'select', 'option', 'textarea', 'ul', 'li', 'ol', 'dl', 'dt', 'dd', 'menu', 'dir', 'ref', 'references', 'img', 'imagemap', 'source', 'small' ] # This is obtained from urlbase = None def get_url(id): global urlbase return "%s?curid=%s" % (urlbase, id) #========================================================================= # # MediaWiki Markup Grammar # https://www.mediawiki.org/wiki/Preprocessor_ABNF # xml-char = %x9 / %xA / %xD / %x20-D7FF / %xE000-FFFD / %x10000-10FFFF # sptab = SP / HTAB # ; everything except ">" (%x3E) # attr-char = %x9 / %xA / %xD / %x20-3D / %x3F-D7FF / %xE000-FFFD / %x10000-10FFFF # literal = *xml-char # title = wikitext-L3 # part-name = wikitext-L3 # part-value = wikitext-L3 # part = ( part-name "=" part-value ) / ( part-value ) # parts = [ title *( "|" part ) ] # tplarg = "{{{" parts "}}}" # template = "{{" parts "}}" # link = "[[" wikitext-L3 "]]" # comment = "" # unclosed-comment = "', re.DOTALL) # Match ignored tags ignored_tag_patterns = [] def ignoreTag(tag): left = re.compile(r'<%s\b.*?>' % tag, re.IGNORECASE | re.DOTALL) # both and right = re.compile(r'' % tag, re.IGNORECASE) ignored_tag_patterns.append((left, right)) for tag in ignoredTags: ignoreTag(tag) # Match selfClosing HTML tags selfClosing_tag_patterns = [ re.compile(r'<\s*%s\b[^>]*/\s*>' % tag, re.DOTALL | re.IGNORECASE) for tag in selfClosingTags ] # Match HTML placeholder tags placeholder_tag_patterns = [ (re.compile(r'<\s*%s(\s*| [^>]+?)>.*?<\s*/\s*%s\s*>' % (tag, tag), re.DOTALL | re.IGNORECASE), repl) for tag, repl in placeholder_tags.items() ] # Match preformatted lines preformatted = re.compile(r'^ .*?$') # Match external links (space separates second optional parameter) externalLink = re.compile(r'\[\w+[^ ]*? (.*?)]') externalLinkNoAnchor = re.compile(r'\[\w+[&\]]*\]') # Matches bold/italic bold_italic = re.compile(r"'''''(.*?)'''''") bold = re.compile(r"'''(.*?)'''") italic_quote = re.compile(r"''\"([^\"]*?)\"''") italic = re.compile(r"''(.*?)''") quote_quote = re.compile(r'""([^"]*?)""') # Matches space spaces = re.compile(r' {2,}') # Matches dots dots = re.compile(r'\.{4,}') #====================================================================== class Template(list): """ A Template is a list of TemplateText or TemplateArgs """ @classmethod def parse(cls, body): tpl = Template() # we must handle nesting, s.a. # {{{1|{{PAGENAME}}} # {{{italics|{{{italic|}}} # {{#if:{{{{{#if:{{{nominee|}}}|nominee|candidate}}|}}}| # start = 0 for s,e in findMatchingBraces(body, 3): tpl.append(TemplateText(body[start:s])) tpl.append(TemplateArg(body[s+3:e-3])) start = e tpl.append(TemplateText(body[start:])) # leftover return tpl def subst(self, params, extractor, depth=0): # We perform parameter substitutions recursively. # We also limit the maximum number of iterations to avoid too long or # even endless loops (in case of malformed input). # :see: http://meta.wikimedia.org/wiki/Help:Expansion#Distinction_between_variables.2C_parser_functions.2C_and_templates # # Parameter values are assigned to parameters in two (?) passes. # Therefore a parameter name in a template can depend on the value of # another parameter of the same template, regardless of the order in # which they are specified in the template call, for example, using # Template:ppp containing "{{{{{{p}}}}}}", {{ppp|p=q|q=r}} and even # {{ppp|q=r|p=q}} gives r, but using Template:tvvv containing # "{{{{{{{{{p}}}}}}}}}", {{tvvv|p=q|q=r|r=s}} gives s. #logging.debug('subst tpl (%d, %d) %s', len(extractor.frame), depth, self) if depth > extractor.maxParameterRecursionLevels: extractor.recursion_exceeded_3_errs += 1 return '' return ''.join([tpl.subst(params, extractor, depth) for tpl in self]) def __str__(self): return ''.join([unicode(x) for x in self]) class TemplateText(unicode): """Fixed text of template""" def subst(self, params, extractor, depth): return self class TemplateArg(object): """ parameter to a template. Has a name and a default value, both of which are Templates. """ def __init__(self, parameter): """ :param parameter: the parts of a tplarg. """ # the parameter name itself might contain templates, e.g.: # appointe{{#if:{{{appointer14|}}}|r|d}}14| # 4|{{{{{subst|}}}CURRENTYEAR}} # any parts in a tplarg after the first (the parameter default) are # ignored, and an equals sign in the first part is treated as plain text. #logging.debug('TemplateArg %s', parameter) parts = splitParts(parameter) self.name = Template.parse(parts[0]) if len(parts) > 1: # This parameter has a default value self.default = Template.parse(parts[1]) else: self.default = None def __str__(self): if self.default: return '{{{%s|%s}}}' % (self.name, self.default) else: return '{{{%s}}}' % self.name def subst(self, params, extractor, depth): """ Substitute value for this argument from dict :param params: Use :param extractor: to evaluate expressions for name and default. Limit substitution to the maximun :param depth:. """ # the parameter name itself might contain templates, e.g.: # appointe{{#if:{{{appointer14|}}}|r|d}}14| paramName = self.name.subst(params, extractor, depth+1) paramName = extractor.expandTemplates(paramName) res = '' if paramName in params: res = params[paramName] # use parameter value specified in template invocation elif self.default: # use the default value defaultValue = self.default.subst(params, extractor, depth+1) res = extractor.expandTemplates(defaultValue) #logging.debug('subst arg %d %s -> %s' % (depth, paramName, res)) return res #====================================================================== substWords = 'subst:|safesubst:' class Extractor(object): """ An extraction task on a article. """ ## # Whether to preserve links in output keepLinks = False ## # Whether to transform sections into HTML keepSections = False ## # Whether to output HTML instead of text toHTML = False def __init__(self, id, title, page): """ :param page: a list of lines. """ self.id = id self.title = title self.page = page self.magicWords = MagicWords() self.frame = [] self.recursion_exceeded_1_errs = 0 # template recursion within expandTemplates() self.recursion_exceeded_2_errs = 0 # template recursion within expandTemplate() self.recursion_exceeded_3_errs = 0 # parameter recursion self.template_title_errs = 0 def extract(self, out): """ :param out: a memory file. """ logging.debug("%s\t%s", self.id, self.title) text = ''.join(self.page) url = get_url(self.id) header = '\n' % (self.id, url, self.title) # Separate header from text with a newline. header += self.title + '\n\n' header = header.encode('utf-8') self.magicWords['pagename'] = self.title self.magicWords['fullpagename'] = self.title self.magicWords['currentyear'] = time.strftime('%Y') self.magicWords['currentmonth'] = time.strftime('%m') self.magicWords['currentday'] = time.strftime('%d') self.magicWords['currenthour'] = time.strftime('%H') self.magicWords['currenttime'] = time.strftime('%H:%M:%S') text = clean(self, text) footer = "\n\n" out.write(header) for line in compact(text): out.write(line.encode('utf-8')) out.write('\n') out.write(footer) errs = (self.template_title_errs, self.recursion_exceeded_1_errs, self.recursion_exceeded_2_errs, self.recursion_exceeded_3_errs) if any(errs): logging.warn("Template errors in article '%s' (%s): title(%d) recursion(%d, %d, %d)", self.title, self.id, *errs) #---------------------------------------------------------------------- # Expand templates maxTemplateRecursionLevels = 30 maxParameterRecursionLevels = 10 # check for template beginning reOpen = re.compile('(?= self.maxTemplateRecursionLevels: self.recursion_exceeded_1_errs += 1 return res #logging.debug(' %d %s', len(self.frame), res) return res def templateParams(self, parameters): """ Build a dictionary with positional or name key to expanded parameters. :param parameters: the parts[1:] of a template, i.e. all except the title. :param depth: recusion depth. """ templateParams = {} if not parameters: return templateParams logging.debug(' # Parameters may span several lines, like: # {{Reflist|colwidth=30em|refs= # <ref name="Goode">Title</ref> # The '=' might occurr within an HTML attribute: # "<ref name=value" # but we stop at first. m = re.match(' *([^=]*?) *=(.*)', param, re.DOTALL) if m: # This is a named parameter. This case also handles parameter # assignments like "2=xxx", where the number of an unnamed # parameter ("2") is specified explicitly - this is handled # transparently. parameterName = m.group(1).strip() parameterValue = m.group(2) if ']]' not in parameterValue: # if the value does not contain a link, trim whitespace parameterValue = parameterValue.strip() templateParams[parameterName] = parameterValue else: # this is an unnamed parameter unnamedParameterCounter += 1 if ']]' not in param: # if the value does not contain a link, trim whitespace param = param.strip() templateParams[str(unnamedParameterCounter)] = param logging.debug(' templateParams> %s', '|'.join(templateParams.values())) return templateParams def expandTemplate(self, body): """Expands template invocation. :param body: the parts of a template. :see http://meta.wikimedia.org/wiki/Help:Expansion for an explanation of the process. See in particular: Expansion of names and values http://meta.wikimedia.org/wiki/Help:Expansion#Expansion_of_names_and_values For most parser functions all names and values are expanded, regardless of what is relevant for the result. The branching functions (#if, #ifeq, #iferror, #ifexist, #ifexpr, #switch) are exceptions. All names in a template call are expanded, and the titles of the tplargs in the template body, after which it is determined which values must be expanded, and for which tplargs in the template body the first part (default). In the case of a tplarg, any parts beyond the first are never expanded. The possible name and the value of the first part is expanded if the title does not match a name in the template call. :see code for braceSubstitution at https://doc.wikimedia.org/mediawiki-core/master/php/html/Parser_8php_source.html#3397: """ # template = "{{" parts "}}" # Templates and tplargs are decomposed in the same way, with pipes as # separator, even though eventually any parts in a tplarg after the first # (the parameter default) are ignored, and an equals sign in the first # part is treated as plain text. # Pipes inside inner templates and tplargs, or inside double rectangular # brackets within the template or tplargs are not taken into account in # this decomposition. # The first part is called title, the other parts are simply called parts. # If a part has one or more equals signs in it, the first equals sign # determines the division into name = value. Equals signs inside inner # templates and tplargs, or inside double rectangular brackets within the # part are not taken into account in this decomposition. Parts without # equals sign are indexed 1, 2, .., given as attribute in the tag. if len(self.frame) >= self.maxTemplateRecursionLevels: self.recursion_exceeded_2_errs += 1 #logging.debug(' INVOCATION> %d %s', len(self.frame), body) return '' logging.debug('INVOCATION %d %s', len(self.frame), body) parts = splitParts(body) # title is the portion before the first | logging.debug('TITLE %s', parts[0].strip()) title = self.expandTemplates(parts[0].strip()) # SUBST # Apply the template tag to parameters without # substituting into them, e.g. # {{subst:t|a{{{p|q}}}b}} gives the wikitext start-a{{{p|q}}}b-end # @see https://www.mediawiki.org/wiki/Manual:Substitution#Partial_substitution subst = False if re.match(substWords, title, re.IGNORECASE): title = re.sub(substWords, '', title, 1, re.IGNORECASE) subst = True if title.lower() in self.magicWords.values: return self.magicWords[title.lower()] # Parser functions # The first argument is everything after the first colon. # It has been evaluated above. colon = title.find(':') if colon > 1: funct = title[:colon] parts[0] = title[colon+1:].strip() # side-effect (parts[0] not used later) # arguments after first are not evaluated ret = callParserFunction(funct, parts, self.frame) return self.expandTemplates(ret) title = fullyQualifiedTemplateTitle(title) if not title: self.template_title_errs += 1 return '' redirected = redirects.get(title) if redirected: title = redirected # get the template if title in templateCache: template = templateCache[title] elif title in templates: template = Template.parse(templates[title]) # add it to cache templateCache[title] = template del templates[title] else: # The page being included could not be identified return '' #logging.debug('TEMPLATE %s: %s', title, template) # tplarg = "{{{" parts "}}}" # parts = [ title *( "|" part ) ] # part = ( part-name "=" part-value ) / ( part-value ) # part-name = wikitext-L3 # part-value = wikitext-L3 # wikitext-L3 = literal / template / tplarg / link / comment / # line-eating-comment / unclosed-comment / # xmlish-element / *wikitext-L3 # A tplarg may contain other parameters as well as templates, e.g.: # {{{text|{{{quote|{{{1|{{error|Error: No text given}}}}}}}}}}} # hence no simple RE like this would work: # '{{{((?:(?!{{{).)*?)}}}' # We must use full CF parsing. # the parameter name itself might be computed, e.g.: # {{{appointe{{#if:{{{appointer14|}}}|r|d}}14|}}} # Because of the multiple uses of double-brace and triple-brace # syntax, expressions can sometimes be ambiguous. # Precedence rules specifed here: # http://www.mediawiki.org/wiki/Preprocessor_ABNF#Ideal_precedence # resolve ambiguities like this: # {{{{ }}}} -> { {{{ }}} } # {{{{{ }}}}} -> {{ {{{ }}} }} # # :see: https://en.wikipedia.org/wiki/Help:Template#Handling_parameters params = parts[1:] if not subst: # Evaluate parameters, since they may contain templates, including # the symbol "=". # {{#ifexpr: {{{1}}} = 1 }} params = [self.expandTemplates(p) for p in params] # build a dict of name-values for the parameter values params = self.templateParams(params) # Perform parameter substitution # extend frame before subst, since there may be recursion in default # parameter value, e.g. {{OTRS|celebrative|date=April 2015}} in article # 21637542 in enwiki. self.frame.append((title, params)) instantiated = template.subst(params, self) #logging.debug('instantiated %d %s', len(self.frame), instantiated) value = self.expandTemplates(instantiated) self.frame.pop() #logging.debug(' INVOCATION> %s %d %s', title, len(self.frame), value) return value # ---------------------------------------------------------------------- # parameter handling def splitParts(paramsList): """ :param paramList: the parts of a template or tplarg. Split template parameters at the separator "|". separator "=". Template parameters often contain URLs, internal links, text or even template expressions, since we evaluate templates outside in. This is required for cases like: {{#if: {{{1}}} | {{lc:{{{1}}} | "parameter missing"}} Parameters are separated by "|" symbols. However, we cannot simply split the string on "|" symbols, since these also appear inside templates and internal links, e.g. {{if:| |{{#if:the president| |{{#if:| [[Category:Hatnote templates|A{{PAGENAME}}]] }} }} }} We split parts at the "|" symbols that are not inside any pair {{{...}}}, {{...}}, [[...]], {|...|}. """ # Must consider '[' as normal in expansion of Template:EMedicine2: # #ifeq: ped|article|[http://emedicine.medscape.com/article/180-overview|[http://www.emedicine.com/ped/topic180.htm#{{#if: |section~}} # as part of: # {{#ifeq: ped|article|[http://emedicine.medscape.com/article/180-overview|[http://www.emedicine.com/ped/topic180.htm#{{#if: |section~}}}} ped/180{{#if: |~}}] # should handle both tpl arg like: # 4|{{{{{subst|}}}CURRENTYEAR}} # and tpl parameters like: # ||[[Category:People|{{#if:A|A|{{PAGENAME}}}}]] sep = '|' parameters = [] cur = 0 for s,e in findMatchingBraces(paramsList): par = paramsList[cur:s].split(sep) if par: if parameters: # portion before | belongs to previous parameter parameters[-1] += par[0] if len(par) > 1: # rest are new parameters parameters.extend(par[1:]) else: parameters = par elif not parameters: parameters = [''] # create first param # add span to last previous parameter parameters[-1] += paramsList[s:e] cur = e # leftover par = paramsList[cur:].split(sep) if par: if parameters: # portion before | belongs to previous parameter parameters[-1] += par[0] if len(par) > 1: # rest are new parameters parameters.extend(par[1:]) else: parameters = par #logging.debug('splitParts %s %s\nparams: %s', sep, paramsList, str(parameters)) return parameters def findMatchingBraces(text, ldelim=0): """ :param ldelim: number of braces to match. 0 means match [[]], {{}} and {{{}}}. """ # Parsing is done with respect to pairs of double braces {{..}} delimiting # a template, and pairs of triple braces {{{..}}} delimiting a tplarg. # If double opening braces are followed by triple closing braces or # conversely, this is taken as delimiting a template, with one left-over # brace outside it, taken as plain text. For any pattern of braces this # defines a set of templates and tplargs such that any two are either # separate or nested (not overlapping). # Unmatched double rectangular closing brackets can be in a template or # tplarg, but unmatched double rectangular opening brackets cannot. # Unmatched double or triple closing braces inside a pair of # double rectangular brackets are treated as plain text. # Other formulation: in ambiguity between template or tplarg on one hand, # and a link on the other hand, the structure with the rightmost opening # takes precedence, even if this is the opening of a link without any # closing, so not producing an actual link. # In the case of more than three opening braces the last three are assumed # to belong to a tplarg, unless there is no matching triple of closing # braces, in which case the last two opening braces are are assumed to # belong to a template. # We must skip individual { like in: # {{#ifeq: {{padleft:|1|}} | { | |  }} # We must resolve ambiguities like this: # {{{{ }}}} -> { {{{ }}} } # {{{{{ }}}}} -> {{ {{{ }}} }} # {{#if:{{{{{#if:{{{nominee|}}}|nominee|candidate}}|}}}|...}} # Handle: # {{{{{|safesubst:}}}#Invoke:String|replace|{{{1|{{{{{|safesubst:}}}PAGENAME}}}}}|%s+%([^%(]-%)$||plain=false}} # as well as expressions with stray }: # {{{link|{{ucfirst:{{{1}}}}}} interchange}}} if ldelim: # 2-3 reOpen = re.compile('[{]{%d,}' % ldelim) # at least ldelim reNext = re.compile('[{]{2,}|}{2,}') # at least 2 else: reOpen = re.compile('{{2,}|\[{2,}') reNext = re.compile('{{2,}|}{2,}|\[{2,}|]{2,}') # at least 2 cur = 0 while True: m1 = reOpen.search(text, cur) if not m1: return lmatch = m1.end()-m1.start() if m1.group()[0] == '{': stack = [lmatch] # stack of opening braces lengths else: stack = [-lmatch] # negative means [ end = m1.end() while True: m2 = reNext.search(text, end) if not m2: return # unbalanced end = m2.end() brac = m2.group()[0] lmatch = m2.end()-m2.start() if brac == '{': stack.append(lmatch) elif brac == '}': while stack: openCount = stack.pop() # opening span if openCount == 0: # illegal unmatched [[ continue if lmatch >= openCount: lmatch -= openCount if lmatch <= 1: # either close or stray } break else: # put back unmatched stack.append(openCount - lmatch) break if not stack: yield m1.start(), end-lmatch cur = end break elif len(stack) == 1 and 0 < stack[0] < ldelim: # ambiguous {{{{{ }}} }} yield m1.start() + stack[0], end cur = end break elif brac == '[': # [[ stack.append(-lmatch) else: # ]] while stack and stack[-1] < 0: # matching [[ openCount = -stack.pop() if lmatch >= openCount: lmatch -= openCount if lmatch <= 1: # either close or stray ] break else: # put back unmatched (negative) stack.append(lmatch - openCount) break if not stack: yield m1.start(), end-lmatch cur = end break # unmatched ]] are discarded cur = end def findBalanced(text, openDelim, closeDelim): """ Assuming that text contains a properly balanced expression using :param openDelim: as opening delimiters and :param closeDelim: as closing delimiters. :return: an iterator producing pairs (start, end) of start and end positions in text containing a balanced expression. """ openPat = '|'.join([re.escape(x) for x in openDelim]) # patter for delimiters expected after each opening delimiter afterPat = { o:re.compile(openPat+'|'+c, re.DOTALL) for o,c in izip(openDelim, closeDelim)} stack = [] start = 0 cur = 0 end = len(text) startSet = False startPat = re.compile(openPat) nextPat = startPat while True: next = nextPat.search(text, cur) if not next: return if not startSet: start = next.start() startSet = True delim = next.group(0) if delim in openDelim: stack.append(delim) nextPat = afterPat[delim] else: opening = stack.pop() # assert opening == openDelim[closeDelim.index(next.group(0))] if stack: nextPat = afterPat[stack[-1]] else: yield start, next.end() nextPat = startPat start = next.end() startSet = False cur = next.end() # ---------------------------------------------------------------------- # Modules # Only minimal support # FIXME: import Lua modules. modules = { 'convert' : { 'convert': lambda x, u, *rest: x+' '+u, # no conversion } } # ---------------------------------------------------------------------- # variables class MagicWords(object): """ One copy in each Extractor. @see https://doc.wikimedia.org/mediawiki-core/master/php/MagicWord_8php_source.html """ names = [ '!', 'currentmonth', 'currentmonth1', 'currentmonthname', 'currentmonthnamegen', 'currentmonthabbrev', 'currentday', 'currentday2', 'currentdayname', 'currentyear', 'currenttime', 'currenthour', 'localmonth', 'localmonth1', 'localmonthname', 'localmonthnamegen', 'localmonthabbrev', 'localday', 'localday2', 'localdayname', 'localyear', 'localtime', 'localhour', 'numberofarticles', 'numberoffiles', 'numberofedits', 'articlepath', 'pageid', 'sitename', 'server', 'servername', 'scriptpath', 'stylepath', 'pagename', 'pagenamee', 'fullpagename', 'fullpagenamee', 'namespace', 'namespacee', 'namespacenumber', 'currentweek', 'currentdow', 'localweek', 'localdow', 'revisionid', 'revisionday', 'revisionday2', 'revisionmonth', 'revisionmonth1', 'revisionyear', 'revisiontimestamp', 'revisionuser', 'revisionsize', 'subpagename', 'subpagenamee', 'talkspace', 'talkspacee', 'subjectspace', 'subjectspacee', 'talkpagename', 'talkpagenamee', 'subjectpagename', 'subjectpagenamee', 'numberofusers', 'numberofactiveusers', 'numberofpages', 'currentversion', 'rootpagename', 'rootpagenamee', 'basepagename', 'basepagenamee', 'currenttimestamp', 'localtimestamp', 'directionmark', 'contentlanguage', 'numberofadmins', 'cascadingsources', ] def __init__(self): self.values = {} self.values['!'] = '|' def __getitem__(self, name): return self.values.get(name) def __setitem__(self, name, value): self.values[name] = value switches = [ '__NOTOC__', '__FORCETOC__', '__TOC__', '__TOC__', '__NEWSECTIONLINK__', '__NONEWSECTIONLINK__', '__NOGALLERY__', '__HIDDENCAT__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOTITLECONVERT__', '__NOTC__', '__START__', '__END__', '__INDEX__', '__NOINDEX__', '__STATICREDIRECT__', '__DISAMBIG__' ] magicWordsRE = re.compile('|'.join(MagicWords.switches)) # ---------------------------------------------------------------------- # parser functions utilities def ucfirst(string): """:return: a string with just its first character uppercase We can't use title() since it coverts all words. """ if string: if len(string) > 1: return string[0].upper() + string[1:] else: return string.upper() else: return '' def lcfirst(string): """:return: a string with its first character lowercase""" if string: if len(string) > 1: return string[0].lower() + string[1:] else: return string.lower() else: return '' def fullyQualifiedTemplateTitle(templateTitle): """ Determine the namespace of the page being included through the template mechanism """ global templatePrefix if templateTitle.startswith(':'): # Leading colon by itself implies main namespace, so strip this colon return ucfirst(templateTitle[1:]) else: m = re.match('([^:]*)(:.*)', templateTitle) if m: # colon found but not in the first position - check if it # designates a known namespace prefix = normalizeNamespace(m.group(1)) if prefix in knownNamespaces: return prefix + ucfirst(m.group(2)) # The title of the page being included is NOT in the main namespace and # lacks any other explicit designation of the namespace - therefore, it # is resolved to the Template namespace (that's the default for the # template inclusion mechanism). # This is a defense against pages whose title only contains UTF-8 chars # that are reduced to an empty string. Right now I can think of one such # case - which represents the non-breaking space. # In this particular case, this page is a redirect to [[Non-nreaking # space]], but having in the system a redirect page with an empty title # causes numerous problems, so we'll live happier without it. if templateTitle: return templatePrefix + ucfirst(templateTitle) else: return '' # caller may log as error def normalizeNamespace(ns): return ucfirst(ns) # ---------------------------------------------------------------------- # Parser functions # see http://www.mediawiki.org/wiki/Help:Extension:ParserFunctions # https://github.com/Wikia/app/blob/dev/extensions/ParserFunctions/ParserFunctions_body.php class Infix: """Infix operators. The calling sequence for the infix is: x |op| y """ def __init__(self, function): self.function = function def __ror__(self, other): return Infix(lambda x, self=self, other=other: self.function(other, x)) def __or__(self, other): return self.function(other) def __rlshift__(self, other): return Infix(lambda x, self=self, other=other: self.function(other, x)) def __rshift__(self, other): return self.function(other) def __call__(self, value1, value2): return self.function(value1, value2) ROUND = Infix(lambda x,y: round(x, y)) def sharp_expr(expr): try: expr = re.sub('=', '==', expr) expr = re.sub('mod', '%', expr) expr = re.sub('\bdiv\b', '/', expr) expr = re.sub('\bround\b', '|ROUND|', expr) return unicode(eval(expr)) except: return '' def sharp_if(testValue, valueIfTrue, valueIfFalse=None, *args): # In theory, we should evaluate the first argument here, # but it was evaluated while evaluating part[0] in expandTemplate(). if testValue.strip(): # The {{#if:}} function is an if-then-else construct. # The applied condition is: "The condition string is non-empty". valueIfTrue = valueIfTrue.strip() if valueIfTrue: return valueIfTrue elif valueIfFalse: return valueIfFalse.strip() return "" def sharp_ifeq(lvalue, rvalue, valueIfTrue, valueIfFalse=None, *args): rvalue = rvalue.strip() if rvalue: # lvalue is always defined if lvalue.strip() == rvalue: # The {{#ifeq:}} function is an if-then-else construct. The # applied condition is "is rvalue equal to lvalue". Note that this # does only string comparison while MediaWiki implementation also # supports numerical comparissons. if valueIfTrue: return valueIfTrue.strip() else: if valueIfFalse: return valueIfFalse.strip() return "" def sharp_iferror(test, then='', Else=None, *args): if re.match('<(?:strong|span|p|div)\s(?:[^\s>]*\s+)*?class="(?:[^"\s>]*\s+)*?error(?:\s[^">]*)?"', test): return then elif Else is None: return test.strip() else: return Else.strip() def sharp_switch(primary, *params): # FIXME: we don't support numeric expressions in primary # {{#switch: comparison string # | case1 = result1 # | case2 # | case4 = result2 # | 1 | case5 = result3 # | #default = result4 # }} primary = primary.strip() found = False # for fall through cases default = None rvalue = None lvalue = '' for param in params: # handle cases like: # #default = [http://www.perseus.tufts.edu/hopper/text?doc=Perseus...] pair = param.split('=', 1) lvalue = pair[0].strip() rvalue = None if len(pair) > 1: # got "=" rvalue = pair[1].strip() # check for any of multiple values pipe separated if found or primary in [v.strip() for v in lvalue.split('|')]: # Found a match, return now return rvalue elif lvalue == '#default': default = rvalue rvalue = None # avoid defaulting to last case elif lvalue == primary: # If the value matches, set a flag and continue found = True # Default case # Check if the last item had no = sign, thus specifying the default case if rvalue is not None: return lvalue elif default is not None: return default return '' # Extension Scribuntu def sharp_invoke(module, function, frame): functions = modules.get(module) if functions: funct = functions.get(function) if funct: # find parameters in frame whose title is the one of the original # template invocation templateTitle = fullyQualifiedTemplateTitle(function) if not templateTitle: logging.warn("Template with empty title") pair = next((x for x in frame if x[0] == templateTitle), None) if pair: params = pair[1] # extract positional args params = [params.get(str(i+1)) for i in range(len(params))] return funct(*params) else: return funct() return '' parserFunctions = { '#expr': sharp_expr, '#if': sharp_if, '#ifeq': sharp_ifeq, '#iferror': sharp_iferror, '#ifexpr': lambda *args: '', # not supported '#ifexist': lambda *args: '', # not supported '#rel2abs': lambda *args: '', # not supported '#switch': sharp_switch, '#language': lambda *args: '', # not supported '#time': lambda *args: '', # not supported '#timel': lambda *args: '', # not supported '#titleparts': lambda *args: '', # not supported # This function is used in some pages to construct links # http://meta.wikimedia.org/wiki/Help:URL 'urlencode': lambda string, *rest: urllib.quote(string.encode('utf-8')), 'lc': lambda string, *rest: string.lower() if string else '', 'lcfirst': lambda string, *rest: lcfirst(string), 'uc': lambda string, *rest: string.upper() if string else '', 'ucfirst': lambda string, *rest: ucfirst(string), 'int': lambda string, *rest: str(int(string)), } def callParserFunction(functionName, args, frame): """ Parser functions have similar syntax as templates, except that the first argument is everything after the first colon. :return: the result of the invocation, None in case of failure. http://meta.wikimedia.org/wiki/Help:ParserFunctions """ try: if functionName == '#invoke': # special handling of frame ret = sharp_invoke(args[0].strip(), args[1].strip(), frame) #logging.debug('parserFunction> %s %s', functionName, ret) return ret if functionName in parserFunctions: ret = parserFunctions[functionName](*args) #logging.debug('parserFunction> %s %s', functionName, ret) return ret except: return "" # FIXME: fix errors return "" # ---------------------------------------------------------------------- # Expand using WikiMedia API # import json # def expandTemplates(text): # """Expand templates invoking MediaWiki API""" # text = urlib.urlencodew(text.encode('utf-8')) # base = urlbase[:urlbase.rfind('/')] # url = base + "/w/api.php?action=expandtemplates&format=json&text=" + text # exp = json.loads(urllib.urlopen(url)) # return exp['expandtemplates']['*'] # ---------------------------------------------------------------------- # Extract Template definition reNoinclude = re.compile(r'(?:.*?)', re.DOTALL) reIncludeonly = re.compile(r'|', re.DOTALL) # These are built before spawning processes, hence thay are shared. templates = {} redirects = {} # cache of parser templates # FIXME: sharing this with a Manager slows down. templateCache = {} def define_template(title, page): """ Adds a template defined in the :param page:. @see https://en.wikipedia.org/wiki/Help:Template#Noinclude.2C_includeonly.2C_and_onlyinclude """ global templates global redirects #title = normalizeTitle(title) # check for redirects m = re.match('#REDIRECT.*?\[\[([^\]]*)]]', page[0], re.IGNORECASE) if m: redirects[title] = m.group(1) #normalizeTitle(m.group(1)) return text = unescape(''.join(page)) # We're storing template text for future inclusion, therefore, # remove all text and keep all text # (but eliminate tags per se). # However, if ... parts are present, # then only keep them and discard the rest of the template body. # This is because using on a text fragment is # equivalent to enclosing it in tags **AND** # enclosing all the rest of the template body in tags. # remove comments text = comment.sub('', text) # eliminate fragments text = reNoinclude.sub('', text) # eliminate unterminated elements text = re.sub(r'.*$', '', text, flags=re.DOTALL) text = re.sub(r'', '', text) onlyincludeAccumulator = '' for m in re.finditer('(.*?)', text, re.DOTALL): onlyincludeAccumulator += m.group(1) if onlyincludeAccumulator: text = onlyincludeAccumulator else: text = reIncludeonly.sub('', text) if text: if title in templates: logging.warn('Redefining: %s', title) templates[title] = text # ---------------------------------------------------------------------- def dropNested(text, openDelim, closeDelim): """ A matching function for nested expressions, e.g. namespaces and tables. """ openRE = re.compile(openDelim, re.IGNORECASE) closeRE = re.compile(closeDelim, re.IGNORECASE) # partition text in separate blocks { } { } spans = [] # pairs (s, e) for each partition nest = 0 # nesting level start = openRE.search(text, 0) if not start: return text end = closeRE.search(text, start.end()) next = start while end: next = openRE.search(text, next.end()) if not next: # termination while nest: # close all pending nest -=1 end0 = closeRE.search(text, end.end()) if end0: end = end0 else: break spans.append((start.start(), end.end())) break while end.end() < next.start(): # { } { if nest: nest -= 1 # try closing more last = end.end() end = closeRE.search(text, end.end()) if not end: # unbalanced if spans: span = (spans[0][0], last) else: span = (start.start(), last) spans = [span] break else: spans.append((start.start(), end.end())) # advance start, find next close start = next end = closeRE.search(text, next.end()) break # { } if next != start: # { { } nest += 1 # collect text outside partitions return dropSpans(spans, text) def dropSpans(spans, text): """ Drop from text the blocks identified in :param spans:, possibly nested. """ spans.sort() res = '' offset = 0 for s, e in spans: if offset <= s: # handle nesting if offset < s: res += text[offset:s] offset = e res += text[offset:] return res # ---------------------------------------------------------------------- # WikiLinks # See https://www.mediawiki.org/wiki/Help:Links#Internal_links # Can be nested [[File:..|..[[..]]..|..]], [[Category:...]], etc. # Also: [[Help:IPA for Catalan|[andora]]] def replaceInternalLinks(text): """ Replaces external links of the form: [[title |...|label]]trail with title concatenated with trail, when present, e.g. 's' for plural. """ # call this after removal of external links, so we need not worry about # triple closing ]]]. cur = 0 res = '' for s,e in findBalanced(text, ['[['], [']]']): m = tailRE.match(text, e) if m: trail = m.group(0) end = m.end() else: trail = '' end = e inner = text[s+2:e-2] # find first | pipe = inner.find('|') if pipe < 0: title = inner label = title else: title = inner[:pipe].rstrip() # find last | curp = pipe+1 for s1,e1 in findBalanced(inner, ['[['], [']]']): last = inner.rfind('|', curp, s1) if last >= 0: pipe = last # advance curp = e1 label = inner[pipe+1:].strip() res += text[cur:s] + makeInternalLink(title, label) + trail cur = end return res + text[cur:] # the official version is a method in class Parser, similar to this: # def replaceInternalLinks2(text): # global wgExtraInterlanguageLinkPrefixes # # the % is needed to support urlencoded titles as well # tc = Title::legalChars() + '#%' # # Match a link having the form [[namespace:link|alternate]]trail # e1 = re.compile("([%s]+)(?:\\|(.+?))?]](.*)" % tc, re.S | re.D) # # Match cases where there is no "]]", which might still be images # e1_img = re.compile("([%s]+)\\|(.*)" % tc, re.S | re.D) # holders = LinkHolderArray(self) # # split the entire text string on occurrences of [[ # iterBrackets = re.compile('[[').finditer(text) # m in iterBrackets.next() # # get the first element (all text up to first [[) # s = text[:m.start()] # cur = m.end() # line = s # useLinkPrefixExtension = self.getTargetLanguage().linkPrefixExtension() # e2 = None # if useLinkPrefixExtension: # # Match the end of a line for a word that is not followed by whitespace, # # e.g. in the case of "The Arab al[[Razi]]", "al" will be matched # global wgContLang # charset = wgContLang.linkPrefixCharset() # e2 = re.compile("((?>.*[^charset]|))(.+)", re.S | re.D | re.U) # if self.mTitle is None: # raise MWException(__METHOD__ + ": \self.mTitle is null\n") # nottalk = not self.mTitle.isTalkPage() # if useLinkPrefixExtension: # m = e2.match(s) # if m: # first_prefix = m.group(2) # else: # first_prefix = false # else: # prefix = '' # useSubpages = self.areSubpagesAllowed() # for m in iterBrackets: # line = text[cur:m.start()] # cur = m.end() # # TODO: Check for excessive memory usage # if useLinkPrefixExtension: # m = e2.match(e2) # if m: # prefix = m.group(2) # s = m.group(1) # else: # prefix = '' # # first link # if first_prefix: # prefix = first_prefix # first_prefix = False # might_be_img = False # m = e1.match(line) # if m: # page with normal label or alt # label = m.group(2) # # If we get a ] at the beginning of m.group(3) that means we have a link that is something like: # # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up, # # the real problem is with the e1 regex # # See bug 1300. # # # # Still some problems for cases where the ] is meant to be outside punctuation, # # and no image is in sight. See bug 2095. # # # if label and m.group(3)[0] == ']' and '[' in label: # label += ']' # so that replaceExternalLinks(label) works later # m.group(3) = m.group(3)[1:] # # fix up urlencoded title texts # if '%' in m.group(1): # # Should anchors '#' also be rejected? # m.group(1) = str_replace(array('<', '>'), array('<', '>'), rawurldecode(m.group(1))) # trail = m.group(3) # else: # m = e1_img.match(line): # if m: # # Invalid, but might be an image with a link in its caption # might_be_img = true # label = m.group(2) # if '%' in m.group(1): # m.group(1) = rawurldecode(m.group(1)) # trail = "" # else: # Invalid form; output directly # s += prefix + '[[' + line # continue # origLink = m.group(1) # # Dont allow internal links to pages containing # # PROTO: where PROTO is a valid URL protocol these # # should be external links. # if (preg_match('/^(?i:' + self.mUrlProtocols + ')/', origLink)) { # s += prefix + '[[' + line # continue # } # # Make subpage if necessary # if useSubpages: # link = self.maybeDoSubpageLink(origLink, label) # else: # link = origLink # noforce = origLink[0] != ':' # if not noforce: # # Strip off leading ':' # link = link[1:] # nt = Title::newFromText(self.mStripState.unstripNoWiki(link)) # if nt is None: # s += prefix + '[[' + line # continue # ns = nt.getNamespace() # iw = nt.getInterwiki() # if might_be_img { # if this is actually an invalid link # if (ns == NS_FILE and noforce) { # but might be an image # found = False # while True: # # look at the next 'line' to see if we can close it there # next_line = iterBrakets.next() # if not next_line: # break # m = explode(']]', next_line, 3) # if m.lastindex == 3: # # the first ]] closes the inner link, the second the image # found = True # label += "[[%s]]%s" % (m.group(0), m.group(1)) # trail = m.group(2) # break # elif m.lastindex == 2: # # if there is exactly one ]] that is fine, we will keep looking # label += "[[{m[0]}]]{m.group(1)}" # else: # # if next_line is invalid too, we need look no further # label += '[[' + next_line # break # if not found: # # we couldnt find the end of this imageLink, so output it raw # # but dont ignore what might be perfectly normal links in the text we ve examined # holders.merge(self.replaceInternalLinks2(label)) # s += "{prefix}[[%s|%s" % (link, text) # # note: no trail, because without an end, there *is* no trail # continue # } else: # it is not an image, so output it raw # s += "{prefix}[[%s|%s" % (link, text) # # note: no trail, because without an end, there *is* no trail # continue # } # wasblank = (text == '') # if wasblank: # text = link # else: # # Bug 4598 madness. Handle the quotes only if they come from the alternate part # # [[Lista d''e paise d''o munno]] . Lista d''e paise d''o munno # # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']] # # . Criticism of Harry Potter # text = self.doQuotes(text) # # Link not escaped by : , create the various objects # if noforce and not nt.wasLocalInterwiki(): # # Interwikis # if iw and mOptions.getInterwikiMagic() and nottalk and ( # Language::fetchLanguageName(iw, None, 'mw') or # in_array(iw, wgExtraInterlanguageLinkPrefixes)): # # Bug 24502: filter duplicates # if iw not in mLangLinkLanguages: # self.mLangLinkLanguages[iw] = True # self.mOutput.addLanguageLink(nt.getFullText()) # s = rstrip(s + prefix) # s += strip(trail, "\n") == '' ? '': prefix + trail # continue # if ns == NS_FILE: # if not wfIsBadImage(nt.getDBkey(), self.mTitle): # if wasblank: # # if no parameters were passed, text # # becomes something like "File:Foo.png", # # which we dont want to pass on to the # # image generator # text = '' # else: # # recursively parse links inside the image caption # # actually, this will parse them in any other parameters, too, # # but it might be hard to fix that, and it doesnt matter ATM # text = self.replaceExternalLinks(text) # holders.merge(self.replaceInternalLinks2(text)) # # cloak any absolute URLs inside the image markup, so replaceExternalLinks() wont touch them # s += prefix + self.armorLinks( # self.makeImage(nt, text, holders)) + trail # else: # s += prefix + trail # continue # if ns == NS_CATEGORY: # s = rstrip(s + "\n") # bug 87 # if wasblank: # sortkey = self.getDefaultSort() # else: # sortkey = text # sortkey = Sanitizer::decodeCharReferences(sortkey) # sortkey = str_replace("\n", '', sortkey) # sortkey = self.getConverterLanguage().convertCategoryKey(sortkey) # self.mOutput.addCategory(nt.getDBkey(), sortkey) # s += strip(prefix + trail, "\n") == '' ? '' : prefix + trail # continue # } # } # # Self-link checking. For some languages, variants of the title are checked in # # LinkHolderArray::doVariants() to allow batching the existence checks necessary # # for linking to a different variant. # if ns != NS_SPECIAL and nt.equals(self.mTitle) and !nt.hasFragment(): # s += prefix + Linker::makeSelfLinkObj(nt, text, '', trail) # continue # # NS_MEDIA is a pseudo-namespace for linking directly to a file # # @todo FIXME: Should do batch file existence checks, see comment below # if ns == NS_MEDIA: # # Give extensions a chance to select the file revision for us # options = [] # descQuery = False # Hooks::run('BeforeParserFetchFileAndTitle', # [this, nt, &options, &descQuery]) # # Fetch and register the file (file title may be different via hooks) # file, nt = self.fetchFileAndTitle(nt, options) # # Cloak with NOPARSE to avoid replacement in replaceExternalLinks # s += prefix + self.armorLinks( # Linker::makeMediaLinkFile(nt, file, text)) + trail # continue # # Some titles, such as valid special pages or files in foreign repos, should # # be shown as bluelinks even though they are not included in the page table # # # # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do # # batch file existence checks for NS_FILE and NS_MEDIA # if iw == '' and nt.isAlwaysKnown(): # self.mOutput.addLink(nt) # s += self.makeKnownLinkHolder(nt, text, array(), trail, prefix) # else: # # Links will be added to the output link list after checking # s += holders.makeHolder(nt, text, array(), trail, prefix) # } # return holders def makeInternalLink(title, label): colon = title.find(':') if colon > 0 and title[:colon] not in acceptedNamespaces: return '' if colon == 0: # drop also :File: colon2 = title.find(':', colon+1) if colon2 > 1 and title[colon+1:colon2] not in acceptedNamespaces: return '' if Extractor.keepLinks: return '%s' % (urllib.quote(title.encode('utf-8')), label) else: return label # ---------------------------------------------------------------------- # External links # from: https://doc.wikimedia.org/mediawiki-core/master/php/DefaultSettings_8php_source.html wgUrlProtocols = [ 'bitcoin:', 'ftp://', 'ftps://', 'geo:', 'git://', 'gopher://', 'http://', 'https://', 'irc://', 'ircs://', 'magnet:', 'mailto:', 'mms://', 'news:', 'nntp://', 'redis://', 'sftp://', 'sip:', 'sips:', 'sms:', 'ssh://', 'svn://', 'tel:', 'telnet://', 'urn:', 'worldwind://', 'xmpp:', '//' ] # from: https://doc.wikimedia.org/mediawiki-core/master/php/Parser_8php_source.html # Constants needed for external link processing # Everything except bracket, space, or control characters # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20 # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052 EXT_LINK_URL_CLASS = r'[^][<>"\x00-\x20\x7F\s]' ExtLinkBracketedRegex = re.compile('\[(((?i)' + '|'.join(wgUrlProtocols) + ')' + EXT_LINK_URL_CLASS + r'+)\s*([^\]\x00-\x08\x0a-\x1F]*?)\]', re.S | re.U) EXT_IMAGE_REGEX = re.compile( r"""^(http://|https://)([^][<>"\x00-\x20\x7F\s]+) /([A-Za-z0-9_.,~%\-+&;#*?!=()@\x80-\xFF]+)\.((?i)gif|png|jpg|jpeg)$""", re.X | re.S | re.U) def replaceExternalLinks(text): s = '' cur = 0 for m in ExtLinkBracketedRegex.finditer(text): s += text[cur:m.start()] cur = m.end() url = m.group(1) label = m.group(3) # # The characters '<' and '>' (which were escaped by # # removeHTMLtags()) should not be included in # # URLs, per RFC 2396. # m2 = re.search('&(lt|gt);', url) # if m2: # link = url[m2.end():] + ' ' + link # url = url[0:m2.end()] # If the link text is an image URL, replace it with an tag # This happened by accident in the original parser, but some people used it extensively m = EXT_IMAGE_REGEX.match(label) if m: label = makeExternalImage(label) # Use the encoded URL # This means that users can paste URLs directly into the text # Funny characters like ö aren't valid in URLs anyway # This was changed in August 2004 s += makeExternalLink(url, label) #+ trail return s + text[cur:] # Function applied to wikiLinks def makeExternalLink(title, anchor): colon = title.find(':') if colon > 0 and title[:colon] not in acceptedNamespaces: return '' if colon == 0: # drop also :File: colon2 = title.find(':', colon+1) if colon2 > 1 and title[colon+1:colon2] not in acceptedNamespaces: return '' if Extractor.keepLinks: return '%s' % (urllib.quote(title.encode('utf-8')), anchor) else: return anchor def makeExternalImage(url, alt=''): if Extractor.keepLinks: return '%s' % (url, alt) else: return alt # ---------------------------------------------------------------------- # match tail after wikilink tailRE = re.compile('\w+') syntaxhighlight = re.compile('<syntaxhighlight .*?>(.*?)</syntaxhighlight>', re.DOTALL) expand_templates = True def clean(extractor, text): """ Transforms wiki markup. @see https://www.mediawiki.org/wiki/Help:Formatting """ if (expand_templates): # expand templates # See: http://www.mediawiki.org/wiki/Help:Templates text = extractor.expandTemplates(text) else: # Drop transclusions (template, parser functions) text = dropNested(text, r'{{', r'}}') # Drop tables text = dropNested(text, r'{\|', r'\|}') # replace external links text = replaceExternalLinks(text) # replace internal links text = replaceInternalLinks(text) # drop MagicWords behavioral switches text = magicWordsRE.sub('', text) ################ Process HTML ############### # turn into HTML, except for the content of res = '' cur = 0 for m in syntaxhighlight.finditer(text): end = m.end() res += unescape(text[cur:m.start()]) + m.group(1) cur = end text = res + unescape(text[cur:]) # Handle bold/italic/quote if extractor.toHTML: text = bold_italic.sub(r'\1', text) text = bold.sub(r'\1', text) text = italic.sub(r'\1', text) else: text = bold_italic.sub(r'\1', text) text = bold.sub(r'\1', text) text = italic_quote.sub(r'"\1"', text) text = italic.sub(r'"\1"', text) text = quote_quote.sub(r'"\1"', text) # residuals of unbalanced quotes text = text.replace("'''", '').replace("''", '"') # Collect spans spans = [] # Drop HTML comments for m in comment.finditer(text): spans.append((m.start(), m.end())) # Drop self-closing tags for pattern in selfClosing_tag_patterns: for m in pattern.finditer(text): spans.append((m.start(), m.end())) # Drop ignored tags for left, right in ignored_tag_patterns: for m in left.finditer(text): spans.append((m.start(), m.end())) for m in right.finditer(text): spans.append((m.start(), m.end())) # Bulk remove all spans text = dropSpans(spans, text) # Drop discarded elements for tag in discardElements: text = dropNested(text, r'<\s*%s\b[^>/]*>' % tag, r'<\s*/\s*%s>' % tag) if not extractor.toHTML: # Turn into text what is left (&nbsp;) and text = unescape(text) # Expand placeholders for pattern, placeholder in placeholder_tag_patterns: index = 1 for match in pattern.finditer(text): text = text.replace(match.group(), '%s_%d' % (placeholder, index)) index += 1 text = text.replace('<<', u'«').replace('>>', u'»') ############################################# # Cleanup text text = text.replace('\t', ' ') text = spaces.sub(' ', text) text = dots.sub('...', text) text = re.sub(u' (,:\.\)\]»)', r'\1', text) text = re.sub(u'(\[\(«) ', r'\1', text) text = re.sub(r'\n\W+?\n', '\n', text, flags=re.U) # lines with only punctuations text = text.replace(',,', ',').replace(',.', '.') return text # skip level 1, it is page name level section = re.compile(r'(==+)\s*(.*?)\s*\1') listOpen = { '*': '

    ', '#': '
      ', ';': '
      ', ':': '
      ' } listClose = { '*': '
', '#': '', ';': '', ':': '' } listItem = { '*': '
  • %s
  • ', '#': '
  • %s', ';': '
    %s
    ', ':': '
    %s
    ' } def compact(text): """Deal with headers, lists, empty sections, residuals of tables. :param toHTML: convert to HTML """ page = [] # list of paragraph headers = {} # Headers for unfilled sections emptySection = False # empty sections are discarded listLevel = '' # nesting of lists for line in text.split('\n'): if not line: continue # Handle section titles m = section.match(line) if m: title = m.group(2) lev = len(m.group(1)) if Extractor.toHTML: page.append("%s" % (lev, title, lev)) if title and title[-1] not in '!?': title += '.' headers[lev] = title # drop previous headers for i in headers.keys(): if i > lev: del headers[i] emptySection = True continue # Handle page title if line.startswith('++'): title = line[2:-2] if title: if title[-1] not in '!?': title += '.' page.append(title) # handle indents elif line[0] == ':': #page.append(line.lstrip(':*#;')) continue # handle lists elif line[0] in '*#;:': if Extractor.toHTML: i = 0 for c,n in izip_longest(listLevel, line, fillvalue=''): if not n or n not in '*#;:': if c: page.append(listClose[c]) listLevel = listLevel[:-1] continue else: break # n != '' if c != n and (not c or (c not in ';:' and n not in ';:')): if c: # close level page.append(listClose[c]) listLevel = listLevel[:-1] listLevel += n page.append(listOpen[n]) i += 1 n = line[i-1] # last list char line = line[i:].strip() if line: # FIXME: n is '"' page.append(listItem[n] % line) else: continue elif len(listLevel): for c in reversed(listLevel): page.append(listClose[c]) listLevel = [] # Drop residuals of lists elif line[0] in '{|' or line[-1] == '}': continue # Drop irrelevant lines elif (line[0] == '(' and line[-1] == ')') or line.strip('.-') == '': continue elif len(headers): if not Extractor.keepSections: items = headers.items() items.sort() for (i, v) in items: page.append(v) headers.clear() page.append(line) # first line emptySection = False elif not emptySection: page.append(line) # dangerous # # Drop preformatted # elif line[0] == ' ': # continue return page def handle_unicode(entity): numeric_code = int(entity[2:-1]) if numeric_code >= 0x10000: return '' return unichr(numeric_code) #------------------------------------------------------------------------------ # Output class NextFile(object): """ Synchronous generation of next available file name. """ filesPerDir = 100 def __init__(self, path_name): self.path_name = path_name self.dir_index = -1 self.file_index = -1 def next(self): self.file_index = (self.file_index + 1) % NextFile.filesPerDir if self.file_index == 0: self.dir_index += 1 dirname = self._dirname() if not os.path.isdir(dirname): os.makedirs(dirname) return self._filepath() def _dirname(self): char1 = self.dir_index % 26 char2 = self.dir_index / 26 % 26 return os.path.join(self.path_name, '%c%c' % (ord('A') + char2, ord('A') + char1)) def _filepath(self): return '%s/wiki_%02d' % (self._dirname(), self.file_index) class OutputSplitter(object): """ File-like object, that splits output to multiple files of a given max size. """ def __init__(self, nextFile, max_file_size=0, compress=True): """ :param nextfile: a NextFile object from which to obtain filenames to use. :param max_file_size: the maximum size of each file. :para compress: whether to write data with bzip compression. """ self.nextFile = nextFile self.compress = compress self.max_file_size = max_file_size self.file = self.open(self.nextFile.next()) def reserve(self, size): if self.file.tell() + size > self.max_file_size: self.close() self.file = self.open(self.nextFile.next()) def write(self, data): self.reserve(len(data)) self.file.write(data) def close(self): self.file.close() def open(self, filename): if self.compress: return bz2.BZ2File(filename + '.bz2', 'w') else: return open(filename, 'w') # ---------------------------------------------------------------------- # READER tagRE = re.compile(r'(.*?)<(/?\w+)[^>]*>(?:([^<]*)(<.*?>)?)?') # 1 2 3 4 def load_templates(file, output_file=None): """ Load templates from :param file:. :param output_file: file where to save templates and modules. """ global templateNamespace, templatePrefix templatePrefix = templateNamespace + ':' global moduleNamespace, modulePrefix modulePrefix = moduleNamespace + ':' articles = 0 page = [] inText = False if output_file: output = codecs.open(output_file, 'wb', 'utf-8') for line in file: line = line.decode('utf-8') if '<' not in line: # faster than doing re.search() if inText: page.append(line) continue m = tagRE.search(line) if not m: continue tag = m.group(2) if tag == 'page': page = [] elif tag == 'title': title = m.group(3) elif tag == 'text': inText = True line = line[m.start(3):m.end(3)] page.append(line) if m.lastindex == 4: # open-close inText = False elif tag == '/text': if m.group(1): page.append(m.group(1)) inText = False elif inText: page.append(line) elif tag == '/page': if not output_file and not templateNamespace: # do not know it yet # we reconstruct it from the first title colon = title.find(':') if colon > 1: templateNamespace = title[:colon] templatePrefix = title[:colon+1] # FIXME: should reconstruct also moduleNamespace if title.startswith(templatePrefix): define_template(title, page) # save templates and modules to file if output_file and (title.startswith(templatePrefix) or title.startswith(modulePrefix)): output.write('\n') output.write(' %s\n' % title) output.write(' 10\n') output.write(' ') for page_line in page: output.write(page_line) output.write(' \n') output.write('\n') page = [] articles += 1 if articles % 100000 == 0: logging.info("Preprocessed %d pages", articles) if output_file: output.close() logging.info("Saved %d templates to '%s'", len(templates), output_file) def process_dump(input_file, template_file, out_file, file_size, file_compress, process_count): """ :param input_file: name of the wikipedia dump file; '-' to read from stdin :param template_file: optional file with template definitions. :param out_file: directory where to store extracted data, or '-' for stdout :param file_size: max size of each extracted file, or None for no max (one file) :param file_compress: whether to compress files with bzip. :param process_count: number of extraction processes to spawn. """ global urlbase global knownNamespaces global templateNamespace, templatePrefix global moduleNamespace, modulePrefix global expand_templates if input_file == '-': input = sys.stdin else: input = fileinput.FileInput(input_file, openhook=fileinput.hook_compressed) # collect siteinfo for line in input: line = line.decode('utf-8') m = tagRE.search(line) if not m: continue tag = m.group(2) if tag == 'base': # discover urlbase from the xml dump file # /mediawiki/siteinfo/base base = m.group(3) urlbase = base[:base.rfind("/")] elif tag == 'namespace': knownNamespaces.add(m.group(3)) if re.search('key="10"', line): templateNamespace = m.group(3) templatePrefix = templateNamespace + ':' elif re.search('key="828"', line): moduleNamespace = m.group(3) modulePrefix = moduleNamespace + ':' elif tag == '/siteinfo': break if expand_templates: # preprocess template_load_start = default_timer() if template_file and os.path.exists(template_file): logging.info("Preprocessing '%s' to collect template definitions: this may take some time.", template_file) file = fileinput.FileInput(template_file, openhook=fileinput.hook_compressed) load_templates(file) file.close() else: if input_file == '-': # can't scan then reset stdin; must error w/ suggestion to specify template_file raise ValueError("to use templates with stdin dump, must supply explicit template-file") logging.info("Preprocessing '%s' to collect template definitions: this may take some time.", input_file) load_templates(input, template_file) input.close() input = fileinput.FileInput(input_file,openhook=fileinput.hook_compressed) template_load_elapsed = default_timer() - template_load_start logging.info("Loaded %d templates in %.1fs", len(templates), template_load_elapsed) if out_file == '-': output = sys.stdout if file_compress: logging.warn("writing to stdout, so no output compression (use external tool)") else: nextFile = NextFile(out_file) output = OutputSplitter(nextFile, file_size, file_compress) # process pages logging.info("Starting page extraction from %s.", input_file) extract_start = default_timer() # Parallel Map/Reduce: # - pages to be processed are dispatched to workers # - a reduce process collects the results, sort them and print them. maxsize = 10 * process_count # output queue output_queue = Queue(maxsize=maxsize) # Reduce job that sorts and prints output reduce = Process(target=reduce_process, args=(output_queue, output)) reduce.start() # initialize jobs queue jobs_queue = Queue(maxsize=maxsize) # start worker processes logging.info("Using %d extract processes.", process_count) workers = [] for _ in xrange(max(1, process_count)): extractor = Process(target=extract_process, args=(jobs_queue, output_queue)) extractor.daemon = True # only live while parent process lives extractor.start() workers.append(extractor) # Mapper process # we collect individual lines, since str.join() is significantly faster # than concatenation page = [] id = None last_id = None ordinal = 0 # page count inText = False redirect = False for line in input: line = line.decode('utf-8') if '<' not in line: # faster than doing re.search() if inText: page.append(line) continue m = tagRE.search(line) if not m: continue tag = m.group(2) if tag == 'page': page = [] redirect = False elif tag == 'id' and not id: id = m.group(3) elif tag == 'title': title = m.group(3) elif tag == 'redirect': redirect = True elif tag == 'text': inText = True line = line[m.start(3):m.end(3)] page.append(line) if m.lastindex == 4: # open-close inText = False elif tag == '/text': if m.group(1): page.append(m.group(1)) inText = False elif inText: page.append(line) elif tag == '/page': colon = title.find(':') if (colon < 0 or title[:colon] in acceptedNamespaces) and id != last_id and \ not redirect and not title.startswith(templateNamespace): job = (id, title, page, ordinal) jobs_queue.put(job) # goes to any available extract_process last_id = id ordinal += 1 id = None page = [] input.close() # signal termination for w in workers: jobs_queue.put(None) # wait for workers to terminate for w in workers: w.join() # signal end of work to reduce process output_queue.put(None) # wait for it to finish reduce.join() if output != sys.stdout: output.close() extract_duration = default_timer() - extract_start extract_rate = ordinal / extract_duration logging.info("Finished %d-process extraction of %d articles in %.1fs (%.1f art/s)", process_count, ordinal, extract_duration, extract_rate) #---------------------------------------------------------------------- # Multiprocess support def extract_process(jobs_queue, output_queue): """Pull tuples of raw page content, do CPU/regex-heavy fixup, push finished text :param job_queue: where to get jobs. :param output_queue: where to queue extracted text for output. """ while True: job = jobs_queue.get() # job is (id, title, page, ordinal) if job: out = StringIO() # memory buffer Extractor(*job[:3]).extract(out) # (id, title, page) text = out.getvalue() output_queue.put((job[3], text)) # (ordinal, extracted_text) out.close() else: break def reduce_process(output_queue, output): """Pull finished article text, write series of files (or stdout) :param output_queue: text to be output. :param output: file object where to print. """ interval_start = default_timer() period = 100000 # FIXME: use a heap ordering_buffer = {} # collected pages next_ordinal = 0 # sequence number of pages while True: if next_ordinal in ordering_buffer: output.write(ordering_buffer.pop(next_ordinal)) next_ordinal += 1 # progress report if next_ordinal % period == 0: interval_rate = period / (default_timer() - interval_start) logging.info("Extracted %d articles (%.1f art/s)", next_ordinal, interval_rate) interval_start = default_timer() else: # mapper puts None to signal finish pair = output_queue.get() if not pair: break ordinal, text = pair ordering_buffer[ordinal] = text # ---------------------------------------------------------------------- # Minimum size of output files minFileSize = 200 * 1024 def main(): global urlbase, acceptedNamespaces global expand_templates, templateCache parser = argparse.ArgumentParser(prog=os.path.basename(sys.argv[0]), formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__) parser.add_argument("input", help="XML wiki dump file") groupO = parser.add_argument_group('Output') groupO.add_argument("-o", "--output", default="text", help="directory for extracted files (or '-' for dumping to stdin)") groupO.add_argument("-b", "--bytes", default="1M", help="maximum bytes per output file (default %(default)s)", metavar="n[KMG]") groupO.add_argument("-c", "--compress", action="store_true", help="compress output files using bzip") groupP = parser.add_argument_group('Processing') groupP.add_argument("--html", action="store_true", help="produce HTML output, subsumes --links and --sections") groupP.add_argument("-l", "--links", action="store_true", help="preserve links") groupP.add_argument("-ns", "--namespaces", default="", metavar="ns1,ns2", help="accepted namespaces") groupP.add_argument("-s", "--sections", action="store_true", help="preserve sections") groupP.add_argument("--templates", help="use or create file containing templates") groupP.add_argument("--no-templates", action="store_false", help="Do not expand templates") default_process_count = cpu_count() - 1 parser.add_argument("--processes", type=int, default=default_process_count, help="Number of extract processes (default %(default)s)") groupS = parser.add_argument_group('Special') groupS.add_argument("-q", "--quiet", action="store_true", help="suppress reporting progress info") groupS.add_argument("--debug", action="store_true", help="print debug info") groupS.add_argument("-a", "--article", action="store_true", help="analyze a file containing a single article (debug option)") groupS.add_argument("-v", "--version", action="version", version='%(prog)s ' + version, help="print program version") args = parser.parse_args() Extractor.keepLinks = args.links Extractor.keepSections = args.sections Extractor.toHTML = args.html if args.html: Extractor.keepLinks = True Extractor.keepSections = True expand_templates = args.no_templates try: power = 'kmg'.find(args.bytes[-1].lower()) + 1 file_size = int(args.bytes[:-1]) * 1024 ** power if file_size < minFileSize: raise ValueError() except ValueError: logging.error('Insufficient or invalid size: %s', args.bytes) return if args.namespaces: acceptedNamespaces = set(args.namespaces.split(',')) FORMAT = '%(levelname)s: %(message)s' logging.basicConfig(format=FORMAT) logger = logging.getLogger() if not args.quiet: logger.setLevel(logging.INFO) if args.debug: logger.setLevel(logging.DEBUG) input_file = args.input if not Extractor.keepLinks: ignoreTag('a') # sharing cache of parser templates is too slow: #manager = Manager() #templateCache = manager.dict() if args.article: if args.templates: if os.path.exists(args.templates): with open(args.templates) as file: load_templates(file) with open(input_file) as file: page = file.read().decode('utf-8') m = re.search(r'(.*)', page) id = m.group(1) if m else 0 m = re.search(r'(.*)', page) if m: title = m.group(1) else: logging.error('Missing title element') return Extractor(id, title, [page]).extract(sys.stdout) return output_path = args.output if output_path != '-' and not os.path.isdir(output_path): try: os.makedirs(output_path) except: logging.error('Could not create: %s', output_path) return process_dump(input_file, args.templates, output_path, file_size, args.compress, args.processes) if __name__ == '__main__': main() vowpal-wabbit-8.6.1.dfsg1/demo/recall_tree/wikipara/makeparadata.py000077500000000000000000000025501332666127000253230ustar00rootroot00000000000000#! /usr/bin/env python # about 1.8 million documents (3x training examples) # # 11502.9 1796001 3130318 0.364573 import os import random import struct import string import sys import time import DocGenerator random.seed (90210) sys.stdout = os.fdopen (sys.stdout.fileno (), 'w', 0) sys.stderr = os.fdopen (sys.stderr.fileno (), 'w', 0) numlabels = int(sys.argv[1]) numparagraphs = int(sys.argv[2]) start = time.time () skip = 0 keep = 0 exnum = 0 with open('docid2label','wb') as f: for docid, paragraphs in DocGenerator.docs ('text/AA/wiki_00.shuf.bz2'): goodparagraphs = [n for n in range (len (paragraphs)) if len (paragraphs[n].split ()) > 20] if len (goodparagraphs) < numparagraphs: skip += 1 continue keep += 1 f.write ('%s\t%u\n'% (docid, keep)) random.shuffle (goodparagraphs) for n in goodparagraphs[0:(numparagraphs-1)]: tokens = [ t.strip (string.punctuation).translate(None,":|") for t in paragraphs[n].split () ] sys.stdout.write ("%u | %s\n"%(keep,' '.join(tokens))) for n in goodparagraphs[(numparagraphs-1):numparagraphs]: tokens = [ t.strip (string.punctuation).translate(None,":|") for t in paragraphs[n].split () ] sys.stderr.write ("%u | %s\n"%(keep,' '.join(tokens))) if keep >= numlabels: break if keep < numlabels: exit(1) vowpal-wabbit-8.6.1.dfsg1/deploy_vw/000077500000000000000000000000001332666127000173255ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/deploy_vw/deploy_vw.vcxproj000066400000000000000000000405101332666127000227520ustar00rootroot00000000000000 DebugLeakCheck Win32 DebugLeakCheck x64 Debug Win32 Debug x64 Release Win32 Release x64 {D3D9B744-D0FC-4BC7-94A8-89C1AC5692DE} deploy_vw ..\vowpalwabbit\ ..\sdl\SDL-7.0-Recommended.ruleset true Application true v140 MultiByte Application true v140 MultiByte Application true v140 MultiByte Application true v140 MultiByte Application false v140 true MultiByte Application false v140 true MultiByte $(SolutionDir)deploy\$(PlatformShortName)\$(Configuration)\ $(SolutionDir)deploy\stage\$(PlatformShortName)\$(Configuration)\ Clean $(SolutionDir)deploy\$(PlatformShortName)\$(Configuration)\ $(SolutionDir)deploy\stage\$(PlatformShortName)\$(Configuration)\ Clean $(SolutionDir)deploy\$(PlatformShortName)\$(Configuration)\ $(SolutionDir)deploy\stage\$(PlatformShortName)\$(Configuration)\ Clean $(SolutionDir)deploy\$(PlatformShortName)\$(Configuration)\ $(SolutionDir)deploy\stage\$(PlatformShortName)\$(Configuration)\ Clean $(SolutionDir)deploy\$(PlatformShortName)\$(Configuration)\ $(SolutionDir)deploy\stage\$(PlatformShortName)\$(Configuration)\ Clean $(SolutionDir)deploy\$(PlatformShortName)\$(Configuration)\ $(SolutionDir)deploy\stage\$(PlatformShortName)\$(Configuration)\ Clean Level3 Disabled true true rd /s /y $(OutputPath) Cleaning Deploy xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\vw.exe" "$(OutputPath)" xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\vw.pdb" "$(OutputPath)" xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\spanning_tree.exe" "$(OutputPath)" xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\spanning_tree.pdb" "$(OutputPath)" xcopy /v /i /r /y "$(VCInstallDir)\redist\Debug_NonRedist\$(PlatformShortName)\Microsoft.VC120.DebugCRT" "$(OutputPath)" xcopy /v /i /r /y "%$(VCInstallDir)redist\Debug_NonRedist\$(PlatformShortName)\Microsoft.VC120.DebugCXXAMP" "$(OutputPath)" xcopy /v /i /r /y "$(VCInstallDir)redist\Debug_NonRedist\$(PlatformShortName)\Microsoft.VC120.DebugOPENMP" "$(OutputPath)" Level3 Disabled true true rd /s /y $(OutputPath) Cleaning Deploy xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\vw.exe" "$(OutputPath)" xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\vw.pdb" "$(OutputPath)" xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\spanning_tree.exe" "$(OutputPath)" xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\spanning_tree.pdb" "$(OutputPath)" xcopy /v /i /r /y "$(VCInstallDir)\redist\Debug_NonRedist\$(PlatformShortName)\Microsoft.VC120.DebugCRT" "$(OutputPath)" xcopy /v /i /r /y "%$(VCInstallDir)redist\Debug_NonRedist\$(PlatformShortName)\Microsoft.VC120.DebugCXXAMP" "$(OutputPath)" xcopy /v /i /r /y "$(VCInstallDir)redist\Debug_NonRedist\$(PlatformShortName)\Microsoft.VC120.DebugOPENMP" "$(OutputPath)" Level3 Disabled true true rd /s /y $(OutputPath) Cleaning Deploy xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\vw.exe" "$(OutputPath)" xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\vw.pdb" "$(OutputPath)" xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\spanning_tree.exe" "$(OutputPath)" xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\spanning_tree.pdb" "$(OutputPath)" xcopy /v /i /r /y "$(VCInstallDir)\redist\Debug_NonRedist\$(PlatformShortName)\Microsoft.VC120.DebugCRT" "$(OutputPath)" xcopy /v /i /r /y "%$(VCInstallDir)redist\Debug_NonRedist\$(PlatformShortName)\Microsoft.VC120.DebugCXXAMP" "$(OutputPath)" xcopy /v /i /r /y "$(VCInstallDir)redist\Debug_NonRedist\$(PlatformShortName)\Microsoft.VC120.DebugOPENMP" "$(OutputPath)" Level3 Disabled true true rd /s /y $(OutputPath) Cleaning Deploy xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\vw.exe" "$(OutputPath)" xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\vw.pdb" "$(OutputPath)" xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\spanning_tree.exe" "$(OutputPath)" xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\spanning_tree.pdb" "$(OutputPath)" xcopy /v /i /r /y "$(VCInstallDir)\redist\Debug_NonRedist\$(PlatformShortName)\Microsoft.VC120.DebugCRT" "$(OutputPath)" xcopy /v /i /r /y "%$(VCInstallDir)redist\Debug_NonRedist\$(PlatformShortName)\Microsoft.VC120.DebugCXXAMP" "$(OutputPath)" xcopy /v /i /r /y "$(VCInstallDir)redist\Debug_NonRedist\$(PlatformShortName)\Microsoft.VC120.DebugOPENMP" "$(OutputPath)" Level3 MaxSpeed true true true true true true rd /s /y $(OutputPath) Cleaning Deploy xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\vw.exe" "$(OutputPath)" xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\spanning_tree.exe" "$(OutputPath)" xcopy /v /i /r /y "$(VCInstallDir)\redist\$(PlatformShortName)\Microsoft.VC120.CRT" "$(OutputPath)" xcopy /v /i /r /y "%$(VCInstallDir)redist\$(PlatformShortName)\Microsoft.VC120.CXXAMP" "$(OutputPath)" xcopy /v /i /r /y "$(VCInstallDir)redist\$(PlatformShortName)\Microsoft.VC120.OPENMP" "$(OutputPath)" Level3 MaxSpeed true true true true true true rd /s /y $(OutputPath) Cleaning Deploy xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\vw.exe" "$(OutputPath)" xcopy /f /v /i /r /y "$(SolutionDir)$(PlatformShortName)\$(Configuration)\spanning_tree.exe" "$(OutputPath)" xcopy /v /i /r /y "$(VCInstallDir)\redist\$(PlatformShortName)\Microsoft.VC120.CRT" "$(OutputPath)" xcopy /v /i /r /y "%$(VCInstallDir)redist\$(PlatformShortName)\Microsoft.VC120.CXXAMP" "$(OutputPath)" xcopy /v /i /r /y "$(VCInstallDir)redist\$(PlatformShortName)\Microsoft.VC120.OPENMP" "$(OutputPath)" This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. vowpal-wabbit-8.6.1.dfsg1/deployvw.bat000066400000000000000000000030241332666127000176550ustar00rootroot00000000000000@echo off if "%1" == "" goto usage if not "%2" == "" goto usage if "%VSINSTALLDIR%" == "" goto missing if /i %1 == x86 goto x86 if /i %1 == x64 goto x64 if %VSINSTALLDIR% :x86 devenv vowpalwabbit\vw.sln /Clean "Release|x86" devenv vowpalwabbit\vw.sln /Rebuild "Release|x86" xcopy /v /i /r /y vowpalwabbit\x86\Release\vw.exe deploy\x86\Release\ xcopy /v /i /r /y vowpalwabbit\x86\Release\spanning_tree.exe deploy\x86\Release\ xcopy /v /i /r /y "%VSINSTALLDIR%VC\redist\x86\Microsoft.VC120.CRT" deploy\x86\Release\ xcopy /v /i /r /y "%VSINSTALLDIR%VC\redist\x86\Microsoft.VC120.CXXAMP" deploy\x86\Release\ xcopy /v /i /r /y "%VSINSTALLDIR%VC\redist\x86\Microsoft.VC120.OPENMP" deploy\x86\Release\ goto :eof :x64 devenv vowpalwabbit\vw.sln /Clean "Release|x64" devenv vowpalwabbit\vw.sln /Rebuild "Release|x64" xcopy /v /i /r /y vowpalwabbit\x64\Release\vw.exe deploy\x64\Release\ xcopy /v /i /r /y vowpalwabbit\x64\Release\spanning_tree.exe deploy\x64\Release\ xcopy /v /i /r /y "%VSINSTALLDIR%VC\redist\x64\Microsoft.VC120.CRT" deploy\x64\Release\ xcopy /v /i /r /y "%VSINSTALLDIR%VC\redist\x64\Microsoft.VC120.CXXAMP" deploy\x64\Release\ xcopy /v /i /r /y "%VSINSTALLDIR%VC\redist\x64\Microsoft.VC120.OPENMP" deploy\x64\Release\ goto :eof :usage echo Error in script usage. The correct usage is: echo %0 [option] echo where [option] is: x86 ^| x64 echo: echo For example: echo %0 x86 goto :eof :missing echo The variable "VSINSTALLDIR" is missing. Visual Studio 2013 might not be installed. goto :eof :eof vowpal-wabbit-8.6.1.dfsg1/doc/000077500000000000000000000000001332666127000160625ustar00rootroot00000000000000vowpal-wabbit-8.6.1.dfsg1/doc/Doxyfile000066400000000000000000003036331332666127000176000ustar00rootroot00000000000000# Doxyfile 1.8.6 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all text # before the first occurrence of this tag. Doxygen uses libiconv (or the iconv # built into libc) for the transcoding. See http://www.gnu.org/software/libiconv # for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = "Vowpal Wabbit" # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = # With the PROJECT_LOGO tag one can specify an logo or icon that is included in # the documentation. The maximum height of the logo should not exceed 55 pixels # and the maximum width should not exceed 200 pixels. Doxygen will copy the logo # to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = ./ # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. # The default value is: NO. CREATE_SUBDIRS = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, # Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), # Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, # Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, # Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, # Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, # Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a # new page for each member. If set to NO, the documentation of a member will be # part of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:\n" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines. ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = NO # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, Javascript, # C#, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL. For instance to make # doxygen treat .inc files as Fortran files (default is PHP), and .f files as C # (default is Fortran), use: inc=Fortran f=C. # # Note For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See http://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by by putting a % sign in front of the word # or globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = YES # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES, then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = NO # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = YES # If the EXTRACT_PACKAGE tag is set to YES all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = YES # If the EXTRACT_STATIC tag is set to YES all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined # locally in source files will be included in the documentation. If set to NO # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. When set to YES local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = YES # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO these classes will be included in the various overviews. This option has # no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # (class|struct|union) declarations. If set to NO these declarations will be # included in the documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file # names in lower-case letters. If set to YES upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. # The default value is: system dependent. CASE_SENSE_NAMES = YES # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the # todo list. This list is created by putting \todo commands in the # documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the # test list. This list is created by putting \test commands in the # documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES the list # will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. Do not use file names with spaces, bibtex cannot handle them. See # also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some parameters # in a documented function, or documenting parameters that don't exist or using # markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO doxygen will only warn about wrong or incomplete parameter # documentation, but not about the absence of documentation. # The default value is: NO. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. # Note: If this tag is empty the current directory is searched. INPUT = ../ # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: http://www.gnu.org/software/libiconv) for the list of # possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank the # following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii, # *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, # *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, # *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, # *.qsf, *.as and *.js. FILE_PATTERNS = # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER ) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = YES # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = YES # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # function all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = YES # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = YES # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES, then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see http://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the config file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = YES # The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in # which the alphabetical index list will be split. # Minimum value: 1, maximum value: 20, default value: 5. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify an additional user- # defined cascading style sheet that is included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefor more robust against future updates. # Doxygen will copy the style sheet file to the output directory. For an example # see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the stylesheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see # http://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use grayscales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to NO can help when comparing the output of multiple runs. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: http://developer.apple.com/tools/xcode/), introduced with # OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a # Makefile in the HTML output directory. Running make will produce the docset in # that directory and running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on # Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler ( hhc.exe). If non-empty # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated ( # YES) or that it should be included in the master .chm file ( NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated ( # YES) or a normal table of contents ( NO) in the .chm file. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- # folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location of Qt's # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the # generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has # the same information as the tab index, you could consider setting # DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NO # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # http://www.mathjax.org) which uses client side Javascript for the rendering # instead of using prerendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = NO # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: # http://docs.mathjax.org/en/latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from http://www.mathjax.org before deployment. # The default value is: http://cdn.mathjax.org/mathjax/latest. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /