pax_global_header00006660000000000000000000000064144552702320014516gustar00rootroot0000000000000052 comment=3d45754325f9ef1c9c6e8851d09a1cfb749572c4 libdogleg-0.16/000077500000000000000000000000001445527023200133745ustar00rootroot00000000000000libdogleg-0.16/Changes000066400000000000000000000104511445527023200146700ustar00rootroot00000000000000libdogleg (0.16) * The build system has been consolidated, and now uses the mrbuild project. This can be installed as a dependency, or downloaded locally. Type "make" for instructions * Building against the new suitesparse is supported -- Dima Kogan Mon, 17 Jul 2023 09:21:26 -0700 libdogleg (0.15.4) * verbose output has a mode to output vnlog * defragmented malloc() calls * dogleg_operatingPoint_t stores the step vector we took to get here * Added dogleg_optimize2() and dogleg_optimize_dense2() entry points that use local parameters -- Dima Kogan Fri, 11 Dec 2020 15:37:26 -0800 libdogleg (0.15.1) * Various updates to the experimental outlier-rejection logic. These break the previous API/ABI, but those were experimental and unreleased anyway -- Dima Kogan Tue, 02 Oct 2018 09:19:03 -0700 ligdogleg (0.15) * added experimental outlier-rejection logic. This is not yet documented, and the API not yet fixed. Use at your own risk -- Dima Kogan Wed, 08 Aug 2018 16:04:38 -0700 libdogleg (0.14) * standardized CHOLMOD error messages * gradient-testing reports are now printed in an easy-to-parse vnlog -- Dima Kogan Mon, 12 Feb 2018 10:55:44 -0800 libdogleg (0.13) * Updated cholmod #includes to work without any -I flags This shouldn't break any existing code, since the -I flags become unnecessary, but benign * Clarified gradient reporting -- Dima Kogan Sun, 27 Aug 2017 16:03:49 -0700 libdogleg (0.12) * Exposed a function to compute the Cholesky factorization of JtJ -- Dima Kogan Mon, 01 May 2017 18:50:39 -0700 libdogleg (0.11) * I barf if factorization fails for with a way-too-huge lambda -- Dima Kogan Mon, 01 May 2017 18:50:20 -0700 libdogleg (0.10) * Improved diagnostic printing: no console output if no errors and if no debug logging is requested * Added support for solving DENSE optimization problems in addition to sparse ones -- Dima Kogan Sun, 23 Apr 2017 01:06:15 -0700 libdogleg (0.09) * Build now works with both suitesparse 2.2 and 2.4 -- Dima Kogan Mon, 24 Aug 2015 21:34:04 -0700 libdogleg (0.08) * Makefile, debian/ tweaked to allow packaging the for the main Debian repository -- Dima Kogan Sun, 04 Nov 2012 21:27:33 -0800 libdogleg (0.07) * fixed off-by-one error in reporting of iteration count * added sample program * improved handling of singular JtJ * more efficient trustregion reduction method for failed GN steps * builds under OSX. Contributed by Laurent Bartholdi. -- Dima Kogan Tue, 02 Oct 2012 22:41:16 -0700 libdogleg (0.06) * API-breaking change: the user can retrieve the full solver context -- Dima Kogan Tue, 17 Jan 2012 15:57:43 -0800 libdogleg (0.05) * I now require debhelper 7, not 8 * 'install' rules now builds everything * updated URL to the license text * removed unneeded packaging files * packaging shared object in a more debian-like way * I now make a native package -- Dima Kogan Sun, 30 Oct 2011 13:37:39 -0700 libdogleg (0.04-1) * Re-licensed under the LGPL * better documentation, now in a manpage * all versions tied to debian/changelog -- Dima Kogan Tue, 06 Sep 2011 14:54:33 -0700 libdogleg (0.03-1) * un-hardcoded the problem I'm asking CHOLMOD to solve * If I ever see a singular JtJ, I factor JtJ + LAMBDA*I from that point on. This is a quick/dirty way to handle singular JtJ matrices * CHOLMOD messages now go to stderr, not stdout -- Dima Kogan Fri, 02 Sep 2011 17:36:25 -0700 libdogleg (0.02-1) * libdogleg now reports the sovler rms and the iteration count * version bump * all debug output now reports more precision * debug output now contains rho * I now return the 2-norm of the optimal error vector. I no longer try to compute the RMS since I don't know what the inputs I'm passed in mean -- Dima Kogan Fri, 26 Aug 2011 15:18:47 -0700 libdogleg (0.01-1) * Initial release -- Dima Kogan Mon, 22 Aug 2011 00:33:18 -0700 libdogleg-0.16/LICENSE000066400000000000000000000001441445527023200144000ustar00rootroot00000000000000This library is distributed under the terms of the GNU LGPL .libdogleg-0.16/Makefile000066400000000000000000000013131445527023200150320ustar00rootroot00000000000000include choose_mrbuild.mk include $(MRBUILD_MK)/Makefile.common.header PROJECT_NAME := dogleg VERSION := $(VERSION_FROM_PROJECT) ABI_VERSION := 2 TAIL_VERSION := $(VERSION) LIB_SOURCES += \ dogleg.c BIN_SOURCES += \ sample.c DIST_INCLUDE += \ dogleg.h LDLIBS += -lcholmod -llapack LDLIBS += -lm CFLAGS += -Wall -Wextra -I/usr/include/suitesparse sample.o: CFLAGS += -I. MAN_SECTION := 3 MAN_TARGET := libdogleg.$(MAN_SECTION) DIST_MAN := $(MAN_TARGET) $(MAN_TARGET): README.pod pod2man --center="libdogleg: Powell's dogleg method" --name=LIBDOGLEG --release="libdogleg $(VERSION)" --section=$(MAN_SECTION) $^ $@ EXTRA_CLEAN += $(MAN_TARGET) include $(MRBUILD_MK)/Makefile.common.footer libdogleg-0.16/README.pod000066400000000000000000000522451445527023200150450ustar00rootroot00000000000000=head1 NAME libdogleg - A general purpose optimizer to solve data fitting problems =head1 NOTICE If you're considering using this library for new projects, please look at the C solver instead: L C is not deprecated, and I will fix bugs as they're reported. However C is more feature-rich and I more widely used. I consider C to be a superset of C, and if C was available when I wrote C, I would not have written it. =head1 DESCRIPTION This is a library for solving large-scale nonlinear optimization problems. By employing sparse linear algebra, it is taylored for problems that have weak coupling between the optimization variables. For appropriately sparse problems this results in I performance gains. For smaller problems with dense Jacobians a dense mode is available also. This utilizes the same optimization loop as the sparse code, but uses dense linear algebra. The main task of this library is to find the vector B

that minimizes norm2( B ) where B = I(B

) is a vector that has higher dimensionality than B

. The user passes in a callback function (of type C) that takes in the vector B

and returns the vector B and a matrix of derivatives B = dB/dB

. B is a matrix with a row for each element of I and a column for each element of B

. If B is a sparse matrix, then this library can take advantage of that, which results in substantial increases in computational efficiency if most entries of B are 0. B is stored row-first in the callback routine. libdogleg uses a column-first data representation so it references the transpose of B (called B). B stored row-first is identical to B stored column-first; this is purely a naming choice. This library implements Powell's dog-leg algorithm to solve the problem. Like the more-widely-known Levenberg-Marquardt algorithm, Powell's dog-leg algorithm solves a nonlinear optimization problem by interpolating between a Gauss-Newton step and a gradient descent step. Improvements over LM are =over =item * a more natural representation of the linearity of the operating point (trust region size vs a vague lambda term). =item * significant efficiency gains, since a matrix inversion isn't needed to retry a rejected step =back The algorithm is described in many places, originally in M. Powell. A Hybrid Method for Nonlinear Equations. In P. Rabinowitz, editor, Numerical Methods for Nonlinear Algebraic Equations, pages 87-144. Gordon and Breach Science, London, 1970. Various enhancements to Powell's original method are described in the literature; at this time only the original algorithm is implemented here. The sparse matrix algebra is handled by the CHOLMOD library, written by Tim Davis. Parts of CHOLMOD are licensed under the GPL and parts under the LGPL. Only the LGPL pieces are used here, allowing libdogleg to be licensed under the LGPL as well. Due to this I lose some convenience (all simple sparse matrix arithmetic in CHOLMOD is GPL-ed) and some performance (the fancier computational methods, such as supernodal analysis are GPL-ed). For my current applications the performance losses are minor. =head1 FUNCTIONS AND TYPES =head2 Main API =head3 dogleg_optimize2 This is the main call to the library for I Jacobians. It's declared as double dogleg_optimize2(double* p, unsigned int Nstate, unsigned int Nmeas, unsigned int NJnnz, dogleg_callback_t* f, void* cookie, const dogleg_parameters2_t* parameters, dogleg_solverContext_t** returnContext); =over =item * B

is the initial estimate of the state vector (and holds the final result) =item * C specifies the number of optimization variables (elements of B

) =item * C specifies the number of measurements (elements of B). C= Nstate> is a requirement =item * C specifies the number of non-zero elements of the jacobian matrix dB/dB

. In a dense matrix C. We are dealing with sparse jacobians, so C should be far less. If not, libdogleg is not an appropriate routine to solve this problem. =item * C specifies the callback function that the optimization routine calls to sample the problem being solved =item * C is an arbitrary data pointer passed untouched to C =item * C a pointer to the set of parameters to use. Set to C to use the global parameters, or call C instead. See the L section below for more details =item * If not C, C can be used to retrieve the full context structure from the solver. This can be useful since this structure contains the latest operating point values. It also has an active C structure, which can be reused if more CHOLMOD routines need to be called externally. You usually want CbeforeStep>. I when done>. =back C returns norm2( B ) at the minimum, or a negative value if an error occurred. =head3 dogleg_optimize This is a flavor of C that implicitly uses the global parameters. It's declared as double dogleg_optimize(double* p, unsigned int Nstate, unsigned int Nmeas, unsigned int NJnnz, dogleg_callback_t* f, void* cookie, dogleg_solverContext_t** returnContext); =head3 dogleg_optimize_dense2 This is the main call to the library for I Jacobians. Its declared as double dogleg_optimize_dense2(double* p, unsigned int Nstate, unsigned int Nmeas, dogleg_callback_dense_t* f, void* cookie, const dogleg_parameters2_t* parameters, dogleg_solverContext_t** returnContext); The arguments are almost identical to those in the C call. =over =item * B

is the initial estimate of the state vector (and holds the final result) =item * C specifies the number of optimization variables (elements of B

) =item * C specifies the number of measurements (elements of B). C= Nstate> is a requirement =item * C specifies the callback function that the optimization routine calls to sample the problem being solved. Note that this callback has a different type from that in C =item * C is an arbitrary data pointer passed untouched to C =item * C a pointer to the set of parameters to use. Set to C to use the global parameters, or call C instead. See the L section below for more details =item * If not C, C can be used to retrieve the full context structure from the solver. This can be useful since this structure contains the latest operating point values. You usually want CbeforeStep>. I when done>. =back C returns norm2( B ) at the minimum, or a negative value if an error occurred. =head3 dogleg_optimize_dense This is a flavor of C that implicitly uses the global parameters. It's declared as double dogleg_optimize_dense(double* p, unsigned int Nstate, unsigned int Nmeas, dogleg_callback_dense_t* f, void* cookie, dogleg_solverContext_t** returnContext); =head3 dogleg_freeContext Used to deallocate memory used for an optimization cycle. Defined as: void dogleg_freeContext(dogleg_solverContext_t** ctx); If a pointer to a context is not requested (by passing C to C), libdogleg calls this routine automatically. If the user I retrieve this pointer, though, it must be freed with C when the user is finished. =head3 dogleg_computeJtJfactorization Computes the cholesky decomposition of JtJ. This function is only exposed if you need to touch libdogleg internals via returnContext. Sometimes after computing an optimization you want to do stuff with the factorization of JtJ, and this call ensures that the factorization is available. Most people don't need this function. If the comment wasn't clear, you don't need this function. This is declared as void dogleg_computeJtJfactorization(dogleg_operatingPoint_t* point, dogleg_solverContext_t* ctx); The arguments are =over =item * C is the operating point of the system. Generally this will be CbeforeStep> where C is from one of the C functions. =item * C is the dogleg context. Generally this will be C from one of the C functions =back =head3 dogleg_testGradient libdogleg requires the user to compute the jacobian matrix B. This is a performance optimization, since B could be computed by differences of B. This optimization is often worth the extra effort, but it creates a possibility that B will have a mistake and B = dB/dB

would not be true. To find these types of issues, the user can call void dogleg_testGradient(unsigned int var, const double* p0, unsigned int Nstate, unsigned int Nmeas, unsigned int NJnnz, dogleg_callback_t* f, void* cookie); This function computes the jacobian with center differences and compares the result with the jacobian computed by the callback function. It is recommended to do this for every variable while developing the program that uses libdogleg. =over =item * C is the index of the variable being tested =item * C is the state vector B

where we're evaluating the jacobian =item * C, C, C are the number of state variables, measurements and non-zero jacobian elements, as before =item * C is the callback function, as before =item * C is the user data, as before =back This function returns nothing, but prints out the test results. =head3 dogleg_testGradient_dense Very similar to C, but for dense jacobians. void dogleg_testGradient_dense(unsigned int var, const double* p0, unsigned int Nstate, unsigned int Nmeas, dogleg_callback_dense_t* f, void* cookie); This function computes the jacobian with center differences and compares the result with the jacobian computed by the callback function. It is recommended to do this for every variable while developing the program that uses libdogleg. =over =item * C is the index of the variable being tested =item * C is the state vector B

where we're evaluating the jacobian =item * C, C are the number of state variables, measurements =item * C is the callback function, as before =item * C is the user data, as before =back This function returns nothing, but prints out the test results. =head3 dogleg_callback_t The main user callback that specifies the sparse optimization problem has type typedef void (dogleg_callback_t)(const double* p, double* x, cholmod_sparse* Jt, void* cookie); =over =item * B

is the current state vector =item * B is the resulting I(B

) =item * B is the transpose of dB/dB

at B

. As mentioned previously, B is stored column-first by CHOLMOD, which can be interpreted as storing B row-first by the user-defined callback routine =item * The C is the user-defined arbitrary data passed into C. =back =head3 dogleg_callback_dense_t The main user callback that specifies the dense optimization problem has type typedef void (dogleg_callback_dense_t)(const double* p, double* x, double* J, void* cookie); =over =item * B

is the current state vector =item * B is the resulting I(B

) =item * B is dB/dB

at B

. B is stored row-first, with all the derivatives for the first measurement, then all the derivatives for the second measurement and so on. =item * The C is the user-defined arbitrary data passed into C. =back =head3 dogleg_solverContext_t This is the solver context that can be retrieved through the C parameter of the C call. This structure contains I the internal state used by the solver. If requested, the user is responsible for calling C when done. This structure is defined as: typedef struct { cholmod_common common; union { dogleg_callback_t* f; dogleg_callback_dense_t* f_dense; }; void* cookie; // between steps, beforeStep contains the operating point of the last step. // afterStep is ONLY used while making the step. Externally, use beforeStep // unless you really know what you're doing dogleg_operatingPoint_t* beforeStep; dogleg_operatingPoint_t* afterStep; // The result of the last JtJ factorization performed. Note that JtJ is not // necessarily factorized at every step, so this is NOT guaranteed to contain // the factorization of the most recent JtJ union { cholmod_factor* factorization; // This is a factorization of JtJ, stored as a packed symmetric matrix // returned by dpptrf('L', ...) double* factorization_dense; }; // Have I ever seen a singular JtJ? If so, I add this constant to the diagonal // from that point on. This is a simple and fast way to deal with // singularities. This constant starts at 0, and is increased every time a // singular JtJ is encountered. This is suboptimal but works for me for now. double lambda; // Are we using sparse math (cholmod)? int is_sparse; int Nstate, Nmeasurements; } dogleg_solverContext_t; Some of the members are copies of the data passed into C; some others are internal state. Of potential interest are =over =item * C is a cholmod_common structure used by all CHOLMOD calls. This can be used for any extra CHOLMOD work the user may want to do =item * C contains the operating point of the optimum solution. The user can analyze this data without the need to re-call the callback routine. =back =head3 dogleg_operatingPoint_t An operating point of the solver. This is a part of C. Various variables describing the operating point such as B

, B, B, B and B are available. All of the just-mentioned variables are computed at every step and are thus always valid. // an operating point of the solver typedef struct { double* p; double* x; double norm2_x; union { cholmod_sparse* Jt; double* J_dense; // row-first: grad0, grad1, grad2, ... }; double* Jt_x; // the cached update vectors. It's useful to cache these so that when a step is rejected, we can // reuse these when we retry double* updateCauchy; union { cholmod_dense* updateGN_cholmoddense; double* updateGN_dense; }; double updateCauchy_lensq, updateGN_lensq; // update vector lengths // whether the current update vectors are correct or not int updateCauchy_valid, updateGN_valid; int didStepToEdgeOfTrustRegion; } dogleg_operatingPoint_t; =head2 Parameters The optimization is controlled by several parameters. These can be set globally for I callers of C in a process using the C functions. Those global values are used by C and C. Or these can be specified independently for each invocation by passing a C argument to C or C. The latter is recommended because multiple instances of libdogleg in a single application would no longer conflict. It is not required to set any of these, but it's highly recommended to set the initial trust-region size and the termination thresholds to match the problem being solved. Furthermore, it's highly recommended for the problem being solved to be scaled so that every state variable affects the objective norm2( B ) equally. This makes this method's concept of "trust region" much more well-defined and makes the termination criteria work correctly. =head3 dogleg_setMaxIterations To set the maximum number of solver iterations, call void dogleg_setMaxIterations(int n); =head3 dogleg_setDebug To turn on diagnostic output, call void dogleg_setDebug(int debug); with a non-zero value for C. Two separate diagnostic streams are available: a verbose human-oriented stream, and a L. By default, diagnostic output is disabled. The C argument is a bit-mapped integer: if(debug == 0 ): no diagnostic output if(debug & DOGLEG_DEBUG_VNLOG): output vnlog diagnostics to stdout if(debug & ~DOGLEG_DEBUG_VNLOG): output human-oriented diagnostics to stderr C has a very high value, so if human diagnostics are desired, the recommended call is: dogleg_setDebug(1); =head3 dogleg_setInitialTrustregion The optimization method keeps track of a trust region size. Here, the trust region is a ball in R^Nstate. When the method takes a step B

-> B

, it makes sure that S ) ) < trust region size>. The initial value of the trust region size can be set with void dogleg_setInitialTrustregion(double t); The dogleg algorithm is efficient when recomputing a rejected step for a smaller trust region, so set the initial trust region size to a value larger to a reasonable estimate; the method will quickly shrink the trust region to the correct size. =head3 dogleg_setThresholds The routine exits when the maximum number of iterations is exceeded, or a termination threshold is hit, whichever happens first. The termination thresholds are all designed to trigger when very slow progress is being made. If all went well, this slow progress is due to us finding the optimum. There are 3 termination thresholds: =over =item * The function being minimized is E = norm2( B ) where B = I(B

). dE/dB

= 2*B*B where B is transpose(dB/dB

). if( for every i fabs(Jt_x[i]) < JT_X_THRESHOLD ) { we are done; } =item * The method takes discrete steps: B

-> B

if( for every i fabs(delta_p[i]) < UPDATE_THRESHOLD) { we are done; } =item * The method dynamically controls the trust region. if(trustregion < TRUSTREGION_THRESHOLD) { we are done; } =back To set these threholds, call void dogleg_setThresholds(double Jt_x, double update, double trustregion); To leave a particular threshold alone, specify a negative value. =head3 dogleg_setTrustregionUpdateParameters This function sets the parameters that control when and how the trust region is updated. The default values should work well in most cases, and shouldn't need to be tweaked. Declaration looks like void dogleg_setTrustregionUpdateParameters(double downFactor, double downThreshold, double upFactor, double upThreshold); To see what the parameters do, look at C in the source. Again, these should just work as is. =head1 BUGS The current implementation doesn't handle a singular B gracefully (B = B * B). Analytically, B is at worst positive semi-definite (has 0 eigenvalues). If a singular B is ever encountered, from that point on, B + lambda*B is inverted instead for some positive constant lambda. This makes the matrix strictly positive definite, but is sloppy. At least I should vary lambda. In my current applications, a singular B only occurs if at a particular operating point the vector B has no dependence at all on some elements of B

. In the general case other causes could exist, though. There's an inefficiency in that the callback always returns B and B. When I evaluate and reject a step, I do not end up using B at all. Dependng on the callback function, it may be better to ask for B and then, if the step is accepted, to ask for B. =head1 AUTHOR Dima Kogan, C<< >> =head1 LICENSE AND COPYRIGHT Copyright 2011 Oblong Industries 2017 Dima Kogan This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. The full text of the license is available at L libdogleg-0.16/choose_mrbuild.mk000066400000000000000000000014641445527023200167300ustar00rootroot00000000000000# Use the local mrbuild or the system mrbuild or tell the user how to download # it ifneq (,$(wildcard mrbuild/)) MRBUILD_MK=mrbuild MRBUILD_BIN=mrbuild/bin else ifneq (,$(wildcard /usr/include/mrbuild/Makefile.common.header)) MRBUILD_MK=/usr/include/mrbuild MRBUILD_BIN=/usr/bin else V := 1.6 SHA512 := 833905b527d13befcb4214bf9b47d1cc848e29c5ac73c1bcfb2efebd4ab92f8d9633d6b2c764b886714b2a438f44d5b802c4a89fd092a750ac57717b4a85b91f URL := https://github.com/dkogan/mrbuild/archive/refs/tags/v$V.tar.gz TARGZ := mrbuild-$V.tar.gz cmd := wget -O $(TARGZ) ${URL} && sha512sum --quiet --strict -c <(echo $(SHA512) $(TARGZ)) && tar xvfz $(TARGZ) && ln -fs mrbuild-$V mrbuild $(error mrbuild not found. Either 'apt install mrbuild', or if not possible, get it locally like this: '${cmd}') endif libdogleg-0.16/dogleg.c000066400000000000000000002517331445527023200150140ustar00rootroot00000000000000// -*- mode: C; c-basic-offset: 2 -*- // Copyright 2011 Oblong Industries // 2017-2018 Dima Kogan // License: GNU LGPL . #include #include #include #include #include #include #include "dogleg.h" #if (CHOLMOD_VERSION > (CHOLMOD_VER_CODE(2,2))) && (CHOLMOD_VERSION < (CHOLMOD_VER_CODE(4,0))) #include #endif // Any non-vnlog bit mask #define DOGLEG_DEBUG_OTHER_THAN_VNLOG (~DOGLEG_DEBUG_VNLOG) #define SAY_NONEWLINE(fmt, ...) fprintf(stderr, "libdogleg at %s:%d: " fmt, __FILE__, __LINE__, ## __VA_ARGS__) #define SAY(fmt, ...) do { SAY_NONEWLINE(fmt, ## __VA_ARGS__); fprintf(stderr, "\n"); } while(0) // This REQUIRES that a "dogleg_solverContext_t* ctx" be available #define SAY_IF_VERBOSE(fmt,...) do { if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_OTHER_THAN_VNLOG ) SAY(fmt, ##__VA_ARGS__); } while(0) // I do this myself because I want this to be active in all build modes, not just !NDEBUG #define ASSERT(x) do { if(!(x)) { SAY("ASSERTION FAILED: " #x " is not true"); exit(1); } } while(0) // used to consolidate the casts #define P(A, index) ((unsigned int*)((A)->p))[index] #define I(A, index) ((unsigned int*)((A)->i))[index] #define X(A, index) ((double* )((A)->x))[index] ////////////////////////////////////////////////////////////////////////////////////////// // vnlog debugging stuff // // This is used if the user calls dogleg_setDebug(DOGLEG_DEBUG_VNLOG | stuff) ////////////////////////////////////////////////////////////////////////////////////////// #define VNLOG_DEBUG_STEP_TYPE_LIST(_) \ _(STEPTYPE_CAUCHY, "cauchy") \ _(STEPTYPE_GAUSSNEWTON, "gaussnewton") \ _(STEPTYPE_INTERPOLATED, "interpolated") \ _(STEPTYPE_FAILED, "failed") #define VNLOG_DEBUG_STEP_TYPE_NAME_COMMA(name,shortname) name, typedef enum { VNLOG_DEBUG_STEP_TYPE_LIST(VNLOG_DEBUG_STEP_TYPE_NAME_COMMA) STEPTYPE_UNINITIALIZED } vnlog_debug_step_type_t; #define VNLOG_DEBUG_FIELDS(_) \ _(double, norm2x_before, INFINITY) \ _(double, norm2x_after, INFINITY) \ _(double, step_len_cauchy, INFINITY) \ _(double, step_len_gauss_newton, INFINITY) \ _(double, step_len_interpolated, INFINITY) \ _(double, k_cauchy_to_gn, INFINITY) \ _(double, step_len, INFINITY) \ _(vnlog_debug_step_type_t, step_type, STEPTYPE_UNINITIALIZED) \ _(double, step_direction_change_deg, INFINITY) \ _(double, expected_improvement, INFINITY) \ _(double, observed_improvement, INFINITY) \ _(double, rho, INFINITY) \ _(double, trustregion_before, INFINITY) \ _(double, trustregion_after, INFINITY) static struct vnlog_debug_data_t { #define VNLOG_DEBUG_DECLARE_FIELD(type, name, initialvalue) type name; VNLOG_DEBUG_FIELDS(VNLOG_DEBUG_DECLARE_FIELD) } vnlog_debug_data; static void vnlog_debug_reset(void) { #define VNLOG_DEBUG_RESET_FIELD(type, name, initialvalue) vnlog_debug_data.name = initialvalue; VNLOG_DEBUG_FIELDS(VNLOG_DEBUG_RESET_FIELD); } static void vnlog_debug_emit_legend(void) { #define VNLOG_DEBUG_SPACE_FIELD_NAME(type, name, initialvalue) " " #name vnlog_debug_reset(); printf("# iteration step_accepted" VNLOG_DEBUG_FIELDS(VNLOG_DEBUG_SPACE_FIELD_NAME) "\n"); } __attribute__((unused)) static void vnlog_emit_double(double x) { if(x == INFINITY) printf("- "); else printf("%g ", x); } __attribute__((unused)) static void vnlog_emit_int(int x) { if(x == -1) printf("- "); else printf("%d ", x); } __attribute__((unused)) static void vnlog_emit_vnlog_debug_step_type_t(vnlog_debug_step_type_t x) { #define VNLOG_DEBUG_STEP_TYPE_SWITCH_EMIT(name,shortname) case name: printf(shortname " "); break; switch(x) { VNLOG_DEBUG_STEP_TYPE_LIST(VNLOG_DEBUG_STEP_TYPE_SWITCH_EMIT) default: printf("- "); } } static void vnlog_debug_emit_record(int iteration, int step_accepted) { #define VNLOG_DEBUG_EMIT_FIELD(type, name, initialvalue) vnlog_emit_ ## type(vnlog_debug_data.name); printf("%d %d ", iteration, step_accepted); VNLOG_DEBUG_FIELDS(VNLOG_DEBUG_EMIT_FIELD); printf("\n"); fflush(stdout); vnlog_debug_reset(); } // Default parameters. Used only by the original API, which uses a global set of // parameters #define PARAMETERS_DEFAULT \ { \ .max_iterations = 100, \ .dogleg_debug = 0, \ .trustregion0 = 1.0e3, \ .trustregion_decrease_factor = 0.1, \ .trustregion_decrease_threshold = 0.25, \ .trustregion_increase_factor = 2, \ .trustregion_increase_threshold = 0.75, \ .Jt_x_threshold = 1e-8, \ .update_threshold = 1e-8, \ .trustregion_threshold = 1e-8 \ } static const dogleg_parameters2_t parameters_default = PARAMETERS_DEFAULT; static dogleg_parameters2_t parameters_global = PARAMETERS_DEFAULT; void dogleg_getDefaultParameters(dogleg_parameters2_t* parameters) { *parameters = parameters_default; } // if I ever see a singular JtJ, I factor JtJ + LAMBDA*I from that point on #define LAMBDA_INITIAL 1e-10 // these parameters likely should be messed with void dogleg_setDebug(int debug) { parameters_global.dogleg_debug = debug; } void dogleg_setInitialTrustregion(double t) { parameters_global.trustregion0 = t; } void dogleg_setThresholds(double Jt_x, double update, double trustregion) { if(Jt_x > 0.0) parameters_global.Jt_x_threshold = Jt_x; if(update > 0.0) parameters_global.update_threshold = update; if(trustregion > 0.0) parameters_global.trustregion_threshold = trustregion; } // these parameters likely should not be messed with. void dogleg_setMaxIterations(int n) { parameters_global.max_iterations = n; } void dogleg_setTrustregionUpdateParameters(double downFactor, double downThreshold, double upFactor, double upThreshold) { parameters_global.trustregion_decrease_factor = downFactor; parameters_global.trustregion_decrease_threshold = downThreshold; parameters_global.trustregion_increase_factor = upFactor; parameters_global.trustregion_increase_threshold = upThreshold; } ////////////////////////////////////////////////////////////////////////////////////////// // general boring linear algebra stuff // should really come from BLAS or libminimath ////////////////////////////////////////////////////////////////////////////////////////// static double norm2(const double* x, unsigned int n) { double result = 0; for(unsigned int i=0; inrow); for(unsigned int i=0; incol; i++) { for(unsigned int j=P(A, i); jncol; i++) { double dotproduct = 0.0; for(unsigned int j=P(At, i); j 0; double* x0 = malloc(Nmeas * sizeof(double)); double* x = malloc(Nmeas * sizeof(double)); double* p = malloc(Nstate * sizeof(double)); ASSERT(x0); ASSERT(x); ASSERT(p); memcpy(p, p0, Nstate * sizeof(double)); cholmod_common _cholmod_common; cholmod_sparse* Jt; cholmod_sparse* Jt0; double* J_dense = NULL; // setting to NULL to pacify compiler's "uninitialized" warnings double* J_dense0 = NULL; // setting to NULL to pacify compiler's "uninitialized" warnings // This is a plain text table, that can be easily parsed with "vnlog" tools printf("# ivar imeasurement gradient_reported gradient_observed error error_relative\n"); if( is_sparse ) { if( !cholmod_start(&_cholmod_common) ) { SAY( "Couldn't initialize CHOLMOD"); return; } Jt = cholmod_allocate_sparse(Nstate, Nmeas, NJnnz, 1, // sorted 1, // packed, 0, // NOT symmetric CHOLMOD_REAL, &_cholmod_common); Jt0 = cholmod_allocate_sparse(Nstate, Nmeas, NJnnz, 1, // sorted 1, // packed, 0, // NOT symmetric CHOLMOD_REAL, &_cholmod_common); p[var] -= GRADTEST_DELTA/2.0; (*f)(p, x0, Jt0, cookie); p[var] += GRADTEST_DELTA; (*f)(p, x, Jt, cookie); p[var] -= GRADTEST_DELTA/2.0; } else { J_dense = malloc( Nmeas * Nstate * sizeof(J_dense[0]) ); J_dense0 = malloc( Nmeas * Nstate * sizeof(J_dense[0]) ); dogleg_callback_dense_t* f_dense = (dogleg_callback_dense_t*)f; p[var] -= GRADTEST_DELTA/2.0; (*f_dense)(p, x0, J_dense0, cookie); p[var] += GRADTEST_DELTA; (*f_dense)(p, x, J_dense, cookie); p[var] -= GRADTEST_DELTA/2.0; } for(unsigned int i=0; i 0, instead I have %d", NJnnz); return; } return _dogleg_testGradient(var, p0, Nstate, Nmeas, NJnnz, f, cookie); } void dogleg_testGradient_dense(unsigned int var, const double* p0, unsigned int Nstate, unsigned int Nmeas, dogleg_callback_dense_t* f, void* cookie) { return _dogleg_testGradient(var, p0, Nstate, Nmeas, 0, (dogleg_callback_t*)f, cookie); } ////////////////////////////////////////////////////////////////////////////////////////// // solver routines ////////////////////////////////////////////////////////////////////////////////////////// static void computeCauchyUpdate(dogleg_operatingPoint_t* point, const dogleg_solverContext_t* ctx) { // I already have this data, so don't need to recompute if(!point->updateCauchy_valid) { point->updateCauchy_valid = 1; // I look at a step in the steepest direction that minimizes my // quadratic error function (Cauchy point). If this is past my trust region, // I move as far as the trust region allows along the steepest descent // direction. My error function is F=norm2(f(p)). dF/dP = 2*ft*J. // This is proportional to Jt_x, which is thus the steepest ascent direction. // // Thus along this direction we have F(k) = norm2(f(p + k Jt_x)). The Cauchy // point is where F(k) is at a minimum: // dF_dk = 2 f(p + k Jt_x)t J Jt_x ~ (x + k J Jt_x)t J Jt_x = // = xt J Jt x + k xt J Jt J Jt x = norm2(Jt x) + k norm2(J Jt x) // dF_dk = 0 -> k= -norm2(Jt x) / norm2(J Jt x) // Summary: // the steepest direction is parallel to Jt*x. The Cauchy point is at // k*Jt*x where k = -norm2(Jt*x)/norm2(J*Jt*x) double norm2_Jt_x = norm2(point->Jt_x, ctx->Nstate); double norm2_J_Jt_x = ctx->is_sparse ? norm2_mul_spmatrix_t_densevector(point->Jt, point->Jt_x) : norm2_mul_matrix_vector (point->J_dense, point->Jt_x, ctx->Nmeasurements, ctx->Nstate); double k = -norm2_Jt_x / norm2_J_Jt_x; point->updateCauchy_lensq = k*k * norm2_Jt_x; vec_copy_scaled(point->updateCauchy, point->Jt_x, k, ctx->Nstate); SAY_IF_VERBOSE( "cauchy step size %.6g", sqrt(point->updateCauchy_lensq)); } if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG ) vnlog_debug_data.step_len_cauchy = sqrt(point->updateCauchy_lensq); } // LAPACK prototypes for a packed cholesky factorization and a linear solve // using that factorization, respectively int dpptrf_(char* uplo, int* n, double* ap, int* info, int uplo_len); int dpptrs_(char* uplo, int* n, int* nrhs, double* ap, double* b, int* ldb, int* info, int uplo_len); void dogleg_computeJtJfactorization(dogleg_operatingPoint_t* point, dogleg_solverContext_t* ctx) { // I already have this data, so don't need to recompute if(point->updateGN_valid) return; if( ctx->is_sparse ) { // I'm assuming the pattern of zeros will remain the same throughout, so I // analyze only once if(ctx->factorization == NULL) { ctx->factorization = cholmod_analyze(point->Jt, &ctx->common); ASSERT(ctx->factorization != NULL); } while(1) { if( ctx->lambda == 0.0 ) ASSERT( cholmod_factorize(point->Jt, ctx->factorization, &ctx->common) ); else { double beta[] = { ctx->lambda, 0 }; ASSERT( cholmod_factorize_p(point->Jt, beta, NULL, 0, ctx->factorization, &ctx->common) ); } if(ctx->factorization->minor == ctx->factorization->n) break; // singular JtJ. Raise lambda and go again if( ctx->lambda == 0.0) ctx->lambda = LAMBDA_INITIAL; else ctx->lambda *= 10.0; ASSERT( isfinite(ctx->lambda) ); SAY_IF_VERBOSE( "singular JtJ. Have rank/full rank: %zd/%d. Adding %g I from now on", ctx->factorization->minor, ctx->Nstate, ctx->lambda); } } else { if(ctx->factorization_dense == NULL) { // Need to store symmetric JtJ, so I only need one triangle of it ctx->factorization_dense = malloc( ctx->Nstate * (ctx->Nstate+1) / 2 * sizeof( ctx->factorization_dense[0])); ASSERT(ctx->factorization_dense); } while(1) { // I construct my JtJ. JtJ is packed and stored row-first. I have two // equivalent implementations. The one enabled here is maybe a bit faster, // but it's definitely clearer #if 1 memset(ctx->factorization_dense, 0, ctx->Nstate*(ctx->Nstate+1)/2*sizeof(ctx->factorization_dense[0])); for(int i=0; iNmeasurements; i++) accum_outerproduct_packed( ctx->factorization_dense, &point->J_dense[ctx->Nstate*i], ctx->Nstate ); if( ctx->lambda > 0.0 ) { int iJtJ=0; for(int i1=0; i1Nstate; i1++) { ctx->factorization_dense[iJtJ] += ctx->lambda; iJtJ += ctx->Nstate-i1; } } #else int iJtJ = 0; for(int i1=0; i1Nstate; i1++) { #error this does not work. overwritten in the following loop ctx->factorization_dense[iJtJ] += ctx->lambda; for(int i0=i1; i0Nstate; i0++, iJtJ++) ctx->factorization_dense[iJtJ] = inner_withstride( &point->J_dense[i0], &point->J_dense[i1], ctx->Nmeasurements, ctx->Nstate); } #endif int info; dpptrf_(&(char){'L'}, &(int){ctx->Nstate}, ctx->factorization_dense, &info, 1); ASSERT(info >= 0); // we MUST either succeed or see complain of singular // JtJ if( info == 0 ) break; // singular JtJ. Raise lambda and go again if( ctx->lambda == 0.0) ctx->lambda = LAMBDA_INITIAL; else ctx->lambda *= 10.0; SAY_IF_VERBOSE( "singular JtJ. Adding %g I from now on", ctx->lambda); } } } static void computeGaussNewtonUpdate(dogleg_operatingPoint_t* point, dogleg_solverContext_t* ctx) { // I already have this data, so don't need to recompute if(!point->updateGN_valid) { dogleg_computeJtJfactorization(point, ctx); // try to factorize the matrix directly. If it's singular, add a small // constant to the diagonal. This constant gets larger if we keep being // singular if( ctx->is_sparse ) { // solve JtJ*updateGN = Jt*x. Gauss-Newton step is then -updateGN cholmod_dense Jt_x_dense = {.nrow = ctx->Nstate, .ncol = 1, .nzmax = ctx->Nstate, .d = ctx->Nstate, .x = point->Jt_x, .xtype = CHOLMOD_REAL, .dtype = CHOLMOD_DOUBLE}; if(point->updateGN_cholmoddense != NULL) cholmod_free_dense(&point->updateGN_cholmoddense, &ctx->common); point->updateGN_cholmoddense = cholmod_solve(CHOLMOD_A, ctx->factorization, &Jt_x_dense, &ctx->common); vec_negate(point->updateGN_cholmoddense->x, ctx->Nstate); // should be more efficient than this later point->updateGN_lensq = norm2(point->updateGN_cholmoddense->x, ctx->Nstate); } else { memcpy( point->updateGN_dense, point->Jt_x, ctx->Nstate * sizeof(point->updateGN_dense[0])); int info; dpptrs_(&(char){'L'}, &(int){ctx->Nstate}, &(int){1}, ctx->factorization_dense, point->updateGN_dense, &(int){ctx->Nstate}, &info, 1); vec_negate(point->updateGN_dense, ctx->Nstate); // should be more efficient than this later point->updateGN_lensq = norm2(point->updateGN_dense, ctx->Nstate); } SAY_IF_VERBOSE( "gn step size %.6g", sqrt(point->updateGN_lensq)); point->updateGN_valid = 1; } if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG ) vnlog_debug_data.step_len_gauss_newton = sqrt(point->updateGN_lensq); } static void computeInterpolatedUpdate(double* update_dogleg, double* update_dogleg_lensq, dogleg_operatingPoint_t* point, double trustregion, const dogleg_solverContext_t* ctx) { // I interpolate between the Cauchy-point step and the Gauss-Newton step // to find a step that takes me to the edge of my trust region. // // I have something like norm2(a + k*(b-a)) = dsq // = norm2(a) + 2*at*(b-a) * k + norm2(b-a)*k^2 = dsq // let c = at*(b-a), l2 = norm2(b-a) -> // l2 k^2 + 2*c k + norm2(a)-dsq = 0 // // This is a simple quadratic equation: // k = (-2*c +- sqrt(4*c*c - 4*l2*(norm2(a)-dsq)))/(2*l2) // = (-c +- sqrt(c*c - l2*(norm2(a)-dsq)))/l2 // to make 100% sure the discriminant is positive, I choose a to be the // cauchy step. The solution must have k in [0,1], so I much have the // +sqrt side, since the other one is negative double dsq = trustregion*trustregion; double norm2a = point->updateCauchy_lensq; const double* a = point->updateCauchy; const double* b = ctx->is_sparse ? point->updateGN_cholmoddense->x : point->updateGN_dense; double l2 = 0.0; double neg_c = 0.0; for(int i=0; iNstate; i++) { double d = a[i] - b[i]; l2 += d*d; neg_c += d*a[i]; } double discriminant = neg_c*neg_c - l2* (norm2a - dsq); if(discriminant < 0.0) { SAY( "negative discriminant: %.6g!", discriminant); discriminant = 0.0; } double k = (neg_c + sqrt(discriminant))/l2; *update_dogleg_lensq = 0.0; for(int i=0; iNstate; i++) { update_dogleg[i] = a[i] + k*(b[i] - a[i]); *update_dogleg_lensq += update_dogleg[i]*update_dogleg[i]; } SAY_IF_VERBOSE( "k_cauchy_to_gn %.6g, norm %.6g", k, sqrt(*update_dogleg_lensq)); if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) { vnlog_debug_data.step_len_interpolated = sqrt(*update_dogleg_lensq); vnlog_debug_data.k_cauchy_to_gn = k; } } // takes in point->p, and computes all the quantities derived from it, storing // the result in the other members of the operatingPoint structure. Returns // true if the gradient-size termination criterion has been met static int computeCallbackOperatingPoint(dogleg_operatingPoint_t* point, dogleg_solverContext_t* ctx) { if( ctx->is_sparse ) { (*ctx->f)(point->p, point->x, point->Jt, ctx->cookie); // compute Jt*x mul_spmatrix_densevector(point->Jt_x, point->Jt, point->x); } else { (*ctx->f_dense)(point->p, point->x, point->J_dense, ctx->cookie); // compute Jt*x mul_matrix_t_densevector(point->Jt_x, point->J_dense, point->x, ctx->Nmeasurements, ctx->Nstate); } // I just got a new operating point, so the current update vectors aren't // valid anymore, and should be recomputed, as needed point->updateCauchy_valid = 0; point->updateGN_valid = 0; // compute the 2-norm of the current error vector // At some point this should be changed to use the call from libminimath point->norm2_x = norm2(point->x, ctx->Nmeasurements); // If the largest absolute gradient element is smaller than the threshold, // we can stop iterating. This is equivalent to the inf-norm for(int i=0; iNstate; i++) if(fabs(point->Jt_x[i]) > ctx->parameters->Jt_x_threshold) return 0; SAY_IF_VERBOSE( "Jt_x all below the threshold. Done iterating!"); return 1; } static double computeExpectedImprovement(const double* step, const dogleg_operatingPoint_t* point, const dogleg_solverContext_t* ctx) { // My error function is F=norm2(f(p + step)). F(0) - F(step) = // = norm2(x) - norm2(x + J*step) = -2*inner(x,J*step) - norm2(J*step) // = -2*inner(Jt_x,step) - norm2(J*step) if( ctx->is_sparse ) return - 2.0*inner(point->Jt_x, step, ctx->Nstate) - norm2_mul_spmatrix_t_densevector(point->Jt, step); else return - 2.0*inner(point->Jt_x, step, ctx->Nstate) - norm2_mul_matrix_vector(point->J_dense, step, ctx->Nmeasurements, ctx->Nstate); } // takes a step from the given operating point, using the given trust region // radius. Returns the expected improvement, based on the step taken and the // linearized x(p). If we can stop iterating, returns a negative number static double takeStepFrom(dogleg_operatingPoint_t* pointFrom, double* p_new, double* step, double* step_len_sq, double trustregion, dogleg_solverContext_t* ctx) { SAY_IF_VERBOSE( "taking step with trustregion %.6g", trustregion); if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) { vnlog_debug_data.trustregion_before = trustregion; vnlog_debug_data.norm2x_before = pointFrom->norm2_x; } computeCauchyUpdate(pointFrom, ctx); if(pointFrom->updateCauchy_lensq >= trustregion*trustregion) { SAY_IF_VERBOSE( "taking cauchy step"); if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) { vnlog_debug_data.step_type = STEPTYPE_CAUCHY; vnlog_debug_data.step_len = vnlog_debug_data.step_len_cauchy; } *step_len_sq = pointFrom->updateCauchy_lensq; // cauchy step goes beyond my trust region, so I do a gradient descent // to the edge of my trust region and call it good vec_copy_scaled(step, pointFrom->updateCauchy, trustregion / sqrt(pointFrom->updateCauchy_lensq), ctx->Nstate); pointFrom->didStepToEdgeOfTrustRegion = 1; } else { // I'm not yet done. The cauchy point is within the trust region, so I can // go further. I look at the full Gauss-Newton step. If this is within the // trust region, I use it. Otherwise, I find the point at the edge of my // trust region that lies on a straight line between the Cauchy point and // the Gauss-Newton solution, and use that. This is the heart of Powell's // dog-leg algorithm. computeGaussNewtonUpdate(pointFrom, ctx); if(pointFrom->updateGN_lensq <= trustregion*trustregion) { SAY_IF_VERBOSE( "taking GN step"); if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) { vnlog_debug_data.step_type = STEPTYPE_GAUSSNEWTON; vnlog_debug_data.step_len = vnlog_debug_data.step_len_gauss_newton; } *step_len_sq = pointFrom->updateGN_lensq; // full Gauss-Newton step lies within my trust region. Take the full step memcpy( step, ctx->is_sparse ? pointFrom->updateGN_cholmoddense->x : pointFrom->updateGN_dense, ctx->Nstate * sizeof(step[0]) ); pointFrom->didStepToEdgeOfTrustRegion = 0; } else { SAY_IF_VERBOSE( "taking interpolated step"); // full Gauss-Newton step lies outside my trust region, so I interpolate // between the Cauchy-point step and the Gauss-Newton step to find a step // that takes me to the edge of my trust region. computeInterpolatedUpdate(step, step_len_sq, pointFrom, trustregion, ctx); pointFrom->didStepToEdgeOfTrustRegion = 1; if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) { vnlog_debug_data.step_type = STEPTYPE_INTERPOLATED; vnlog_debug_data.step_len = vnlog_debug_data.step_len_interpolated; } } } // take the step vec_add(p_new, pointFrom->p, step, ctx->Nstate); double expectedImprovement = computeExpectedImprovement(step, pointFrom, ctx); if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) { vnlog_debug_data.expected_improvement = expectedImprovement; if(pointFrom->step_to_here_len_sq != INFINITY) { double cos_direction_change = inner(step, pointFrom->step_to_here, ctx->Nstate) / sqrt(*step_len_sq * pointFrom->step_to_here_len_sq); // check the numerical overflow cases if(cos_direction_change >= 1.0) vnlog_debug_data.step_direction_change_deg = 0.0; else if(cos_direction_change <= -1.0) vnlog_debug_data.step_direction_change_deg = 180.0; else vnlog_debug_data.step_direction_change_deg = 180.0/M_PI*acos(cos_direction_change); } } // are we done? For each state variable I look at the update step. If all the elements fall below // a threshold, I call myself done for(int i=0; iNstate; i++) if( fabs(step[i]) > ctx->parameters->update_threshold ) return expectedImprovement; SAY_IF_VERBOSE( "update small enough. Done iterating!"); return -1.0; } // I have a candidate step. I adjust the trustregion accordingly, and also // report whether this step should be accepted (0 == rejected, otherwise // accepted) static int evaluateStep_adjustTrustRegion(const dogleg_operatingPoint_t* before, const dogleg_operatingPoint_t* after, double* trustregion, double expectedImprovement, dogleg_solverContext_t* ctx) { double observedImprovement = before->norm2_x - after->norm2_x; double rho = observedImprovement / expectedImprovement; SAY_IF_VERBOSE( "observed/expected improvement: %.6g/%.6g. rho = %.6g", observedImprovement, expectedImprovement, rho); if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) { vnlog_debug_data.observed_improvement = observedImprovement; vnlog_debug_data.rho = rho; } // adjust the trust region if( rho < ctx->parameters->trustregion_decrease_threshold ) { SAY_IF_VERBOSE( "rho too small. decreasing trust region"); // Our model doesn't fit well. We should reduce the trust region size. If // the trust region size was affecting the attempted step, do this by a // constant factor. Otherwise, drop the trustregion to attempted step size // first if( !before->didStepToEdgeOfTrustRegion ) *trustregion = sqrt(before->updateGN_lensq); *trustregion *= ctx->parameters->trustregion_decrease_factor; } else if (rho > ctx->parameters->trustregion_increase_threshold && before->didStepToEdgeOfTrustRegion) { SAY_IF_VERBOSE( "rho large enough. increasing trust region"); *trustregion *= ctx->parameters->trustregion_increase_factor; } if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) vnlog_debug_data.trustregion_after = *trustregion; return rho > 0.0; } static int runOptimizer(dogleg_solverContext_t* ctx) { double trustregion = ctx->parameters->trustregion0; int stepCount = 0; if( computeCallbackOperatingPoint(ctx->beforeStep, ctx) ) return stepCount; SAY_IF_VERBOSE( "Initial operating point has norm2_x %.6g", ctx->beforeStep->norm2_x); ctx->beforeStep->step_to_here_len_sq = INFINITY; while( stepCountparameters->max_iterations ) { SAY_IF_VERBOSE( "================= step %d", stepCount ); while(1) { SAY_IF_VERBOSE("--------"); double expectedImprovement = takeStepFrom(ctx->beforeStep, ctx->afterStep->p, ctx->afterStep->step_to_here, &ctx->afterStep->step_to_here_len_sq, trustregion, ctx); // negative expectedImprovement is used to indicate that we're done if(expectedImprovement < 0.0) { if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG ) vnlog_debug_emit_record(stepCount, 1); return stepCount; } int afterStepZeroGradient = computeCallbackOperatingPoint(ctx->afterStep, ctx); SAY_IF_VERBOSE( "Evaluated operating point with norm2_x %.6g", ctx->afterStep->norm2_x); if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) vnlog_debug_data.norm2x_after = ctx->afterStep->norm2_x; if( evaluateStep_adjustTrustRegion(ctx->beforeStep, ctx->afterStep, &trustregion, expectedImprovement, ctx) ) { SAY_IF_VERBOSE( "accepted step"); if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG ) vnlog_debug_emit_record(stepCount, 1); stepCount++; // I accept this step, so the after-step operating point is the before-step operating point // of the next iteration. I exchange the before- and after-step structures so that all the // pointers are still around and I don't have to re-allocate dogleg_operatingPoint_t* tmp; tmp = ctx->afterStep; ctx->afterStep = ctx->beforeStep; ctx->beforeStep = tmp; if( afterStepZeroGradient ) { SAY_IF_VERBOSE( "Gradient low enough and we just improved. Done iterating!" ); return stepCount; } break; } SAY_IF_VERBOSE( "rejected step"); if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG ) vnlog_debug_emit_record(stepCount, 0); // This step was rejected. check if the new trust region size is small // enough to give up if(trustregion < ctx->parameters->trustregion_threshold) { SAY_IF_VERBOSE( "trust region small enough. Giving up. Done iterating!"); return stepCount; } // I have rejected this step, so I try again with the new trust region } } if(stepCount == ctx->parameters->max_iterations) SAY_IF_VERBOSE( "Exceeded max number of iterations"); return stepCount; } static dogleg_operatingPoint_t* allocOperatingPoint(int Nstate, int numMeasurements, unsigned int NJnnz, cholmod_common* common) { int is_sparse = NJnnz > 0; dogleg_operatingPoint_t* point = malloc(sizeof(dogleg_operatingPoint_t)); ASSERT(point != NULL); int Npool = Nstate + numMeasurements + Nstate + Nstate + Nstate; if(!is_sparse) Npool += numMeasurements*Nstate + Nstate; double* pool = malloc( Npool * sizeof(double) ); ASSERT( pool != NULL ); point->p = &pool[0]; point->x = &pool[Nstate]; point->Jt_x = &pool[Nstate + numMeasurements]; point->updateCauchy = &pool[Nstate + numMeasurements + Nstate]; point->step_to_here = &pool[Nstate + numMeasurements + Nstate + Nstate]; if( is_sparse ) { point->Jt = cholmod_allocate_sparse(Nstate, numMeasurements, NJnnz, 1, // sorted 1, // packed, 0, // NOT symmetric CHOLMOD_REAL, common); ASSERT(point->Jt != NULL); point->updateGN_cholmoddense = NULL; // This will be allocated as it is used } else { point->J_dense = &pool[Nstate + numMeasurements + Nstate + Nstate + Nstate]; point->updateGN_dense = &pool[Nstate + numMeasurements + Nstate + Nstate + Nstate + numMeasurements * Nstate]; } // vectors don't have any valid data yet point->updateCauchy_valid = 0; point->updateGN_valid = 0; return point; } static void freeOperatingPoint(dogleg_operatingPoint_t** point, cholmod_common* common) { // MUST match allocOperatingPoint(). It does a single malloc() and stores the // pointer into p free((*point)->p); int is_sparse = common != NULL; if( is_sparse ) { cholmod_free_sparse(&(*point)->Jt, common); if((*point)->updateGN_cholmoddense != NULL) cholmod_free_dense(&(*point)->updateGN_cholmoddense, common); } free(*point); *point = NULL; } static int cholmod_error_callback(const char* s, ...) { SAY_NONEWLINE(""); va_list ap; va_start(ap, s); int ret = vfprintf(stderr, s, ap); va_end(ap); fprintf(stderr, "\n"); return ret; } static void set_cholmod_options(cholmod_common* cc) { // I want to use LGPL parts of CHOLMOD only, so I turn off the supernodal routines. This gave me a // 25% performance hit in the solver for a particular set of optical calibration data. cc->supernodal = 0; // I want all output to go to STDERR, not STDOUT #if (CHOLMOD_VERSION <= (CHOLMOD_VER_CODE(2,2))) cc->print_function = cholmod_error_callback; #elif (CHOLMOD_VERSION < (CHOLMOD_VER_CODE(4,0))) CHOLMOD_FUNCTION_DEFAULTS ; CHOLMOD_FUNCTION_PRINTF(cc) = cholmod_error_callback; #else SuiteSparse_config_printf_func_set(cholmod_error_callback); #endif } void dogleg_freeContext(dogleg_solverContext_t** ctx) { freeOperatingPoint(&(*ctx)->beforeStep, (*ctx)->is_sparse ? &(*ctx)->common : NULL); freeOperatingPoint(&(*ctx)->afterStep, (*ctx)->is_sparse ? &(*ctx)->common : NULL); if( (*ctx)->is_sparse ) { if((*ctx)->factorization != NULL) cholmod_free_factor (&(*ctx)->factorization, &(*ctx)->common); cholmod_finish(&(*ctx)->common); } else free((*ctx)->factorization_dense); free(*ctx); *ctx = NULL; } static double _dogleg_optimize(double* p, unsigned int Nstate, unsigned int Nmeas, unsigned int NJnnz, dogleg_callback_t* f, void* cookie, // NULL to use the globals const dogleg_parameters2_t* parameters, dogleg_solverContext_t** returnContext) { int is_sparse = NJnnz > 0; dogleg_solverContext_t* ctx = malloc(sizeof(dogleg_solverContext_t)); ctx->f = f; ctx->cookie = cookie; ctx->factorization = NULL; ctx->lambda = 0.0; ctx->Nstate = Nstate; ctx->Nmeasurements = Nmeas; ctx->parameters = parameters ? parameters : ¶meters_global; if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG ) vnlog_debug_emit_legend(); if( returnContext != NULL ) *returnContext = ctx; if( is_sparse ) { if( !cholmod_start(&ctx->common) ) { SAY( "Couldn't initialize CHOLMOD"); return -1.0; } set_cholmod_options(&ctx->common); ctx->is_sparse = 1; } else ctx->is_sparse = 0; ctx->beforeStep = allocOperatingPoint(Nstate, Nmeas, NJnnz, &ctx->common); ctx->afterStep = allocOperatingPoint(Nstate, Nmeas, NJnnz, &ctx->common); memcpy(ctx->beforeStep->p, p, Nstate * sizeof(double)); // everything is set up, so run the solver! int numsteps = runOptimizer(ctx); double norm2_x = ctx->beforeStep->norm2_x; // runOptimizer places the most recent results into beforeStep in preparation for another // iteration memcpy(p, ctx->beforeStep->p, Nstate * sizeof(double)); SAY_IF_VERBOSE( "success! took %d iterations", numsteps); if( returnContext == NULL ) dogleg_freeContext(&ctx); return norm2_x; } double dogleg_optimize2(double* p, unsigned int Nstate, unsigned int Nmeas, unsigned int NJnnz, dogleg_callback_t* f, void* cookie, const dogleg_parameters2_t* parameters, dogleg_solverContext_t** returnContext) { if( NJnnz == 0 ) { SAY( "I must have NJnnz > 0, instead I have %d", NJnnz); return -1.0; } return _dogleg_optimize(p, Nstate, Nmeas, NJnnz, f, cookie, parameters, returnContext); } double dogleg_optimize(double* p, unsigned int Nstate, unsigned int Nmeas, unsigned int NJnnz, dogleg_callback_t* f, void* cookie, dogleg_solverContext_t** returnContext) { return dogleg_optimize2(p,Nstate,Nmeas,NJnnz,f,cookie, NULL, // no parameters; use the globals returnContext); } double dogleg_optimize_dense2(double* p, unsigned int Nstate, unsigned int Nmeas, dogleg_callback_dense_t* f, void* cookie, const dogleg_parameters2_t* parameters, dogleg_solverContext_t** returnContext) { return _dogleg_optimize(p, Nstate, Nmeas, 0, (dogleg_callback_t*)f, cookie, parameters, returnContext); } double dogleg_optimize_dense(double* p, unsigned int Nstate, unsigned int Nmeas, dogleg_callback_dense_t* f, void* cookie, dogleg_solverContext_t** returnContext) { return dogleg_optimize_dense2(p,Nstate,Nmeas,f,cookie, NULL, // no parameters; use the globals returnContext); } // Computes pinv(J) for a subset of measurements: inv(JtJ) * // Jt[imeasurement0..imeasurement0+N-1]. Returns false if something failed. // ASSUMES THAT THE CHOLESKY FACTORIZATION HAS ALREADY BEEN COMPUTED. // // This function is experimental, and subject to change static bool pseudoinverse_J_dense(// output double* out, // inputs const dogleg_operatingPoint_t* point, const dogleg_solverContext_t* ctx, int i_meas0, int NmeasInChunk) { int info; memcpy(out, &point->J_dense[i_meas0*ctx->Nstate], NmeasInChunk*ctx->Nstate*sizeof(double)); dpptrs_(&(char){'L'}, &(int){ctx->Nstate}, &NmeasInChunk, ctx->factorization_dense, out, &(int){ctx->Nstate}, &info, 1); return info==0; } // Computes pinv(J) for a subset of measurements: inv(JtJ) * // Jt[imeasurement0..imeasurement0+N-1]. Returns false if something failed. // ASSUMES THAT THE CHOLESKY FACTORIZATION HAS ALREADY BEEN COMPUTED. // // allocates memory, returns NULL on failure. ON SUCCESS, THE CALLER IS // RESPONSIBLE FOR FREEING THE RETURNED MEMORY // // This function is experimental, and subject to change static cholmod_dense* pseudoinverse_J_sparse(// inputs const dogleg_operatingPoint_t* point, dogleg_solverContext_t* ctx, int i_meas0, int NmeasInChunk, // Pre-allocated array for the // right-hand-side. This will be used as a // workspace. Create this like so: // // cholmod_allocate_dense( Nstate, // NmeasInChunk, // Nstate, // CHOLMOD_REAL, // &ctx->common ); cholmod_dense* Jt_chunk) { // I'm solving JtJ x = b where J is sparse, b is sparse, but x ends up dense. // cholmod doesn't have functions for this exact case. so I use the // dense-sparse-dense function (cholmod_solve), and densify the input. Instead // of sparse-sparse-sparse and the densifying the output (cholmod_spsolve). // This feels like it'd be more efficient memset( Jt_chunk->x, 0, Jt_chunk->nrow*Jt_chunk->ncol*sizeof(double) ); int Jt_chunk_ncol_backup = Jt_chunk->ncol; for(int i_meas=0; i_meas= (int)ctx->Nmeasurements ) { // at the end, we could have one chunk with less that chunk_size // columns Jt_chunk->ncol = i_meas; break; } for(unsigned int i0=P(point->Jt, i_meas0+i_meas); i0Jt, i_meas0+i_meas+1); i0++) { int irow = I(point->Jt,i0); double x = X(point->Jt,i0); ((double*)Jt_chunk->x)[irow + i_meas*Jt_chunk->nrow] = x; } } // solve inv(JtJ)Jt[slice] cholmod_dense* pinv = cholmod_solve(CHOLMOD_A, ctx->factorization, Jt_chunk, &ctx->common); Jt_chunk->ncol = Jt_chunk_ncol_backup; return pinv; } /* The below is a bunch of code for outlier detection/rejection and for confidence evaluation. IT'S ALL EXPERIMENTAL AND SUBJECT TO CHANGE What is an outlier? Suppose I just found an optimum. I define an outlier as an observation that does two things to the problem if I remove that observation: 1. The cost function would improve significantly. Things would clearly improve because the cost function contribution of the removed point itself would get removed, but ALSO because the parameters could fit the remaining data better without that extra observation; the "outlier" wouldn't pull the solution away from the "true" optimum. 2. The confidence of the solution does not significantly decrease. One could imagine a set of data that define the problem poorly, and produce a low cost function value for some (overfit) set of parameters. And one can imagine an extra point being added that defines the problem and increases the confidence of the solution. This extra point would suppress the overfitting, so this extra point would increase the cost function value. Condition 1 above would make this extra point look like an outlier, and this condition is meant to detect this case and to classify this point as NOT an outlier Let's say we just computed an optimum least-squares fit, and we try to determine if some of the data points look like outliers. The least squares problem I just solved has cost function E = norm2(x) where x is a length-N vector of measurements. We solved it by optimizing the vector of parameters p. We're at an optimum, so dE/dp = 0 -> Jt x = 0 We define an outlier as a measurement that would greatly improve the cost function E if this measurement was removed, and the problem was re-optimized. Let's say the problem is locally linear (J = dx/dp is constant), and let's say re-optimizing moves the parameter by dp. The original cost function was E0 = norm2(x) If we move by dp, and take away the outlier's own cost, we get E1 = norm2(x + J dp) - norm2( x* + J* dp ) where x* and J* refer to the outlier measurements. Thus "Dima's self+others factor" is norm2(x) - (norm2(x + J dp) - norm2( x* + J* dp )) We can choose to look at ONLY the effect on the other variables. That would produce "Dima's others factor" = norm2(x)-norm2(x*) - (norm2(x + J dp) - norm2(x* + J* dp )) This is very similar to Cook's D factor (which Dima apparently reinvented 40 years after the fact!). This factor looks not at the difference of norm2(x), but at norm2(difference) instead. So "Cook's self+others factor" is proportional to norm2(x - (x+J dp)). This is the "normal" Cook's D factor. We can also compute "Cook's others factor": norm2(x - vec(x*) - (x + J dp -(vec(x*) + full(J*) dp))) = norm2( - J dp + full(J*) dp) = norm2(J dp) + norm2(full(J*) dp) -2 dpt(Jt full(J*) + full(J*)tJ)dp = norm2(J dp) + norm2(full(J*) dp) -2 dpt(Jt full(J*) + full(J*)tJ)dp = norm2(J dp) + norm2(J* dp) - 2 norm2(J* dp) = norm2(J dp) - norm2(J* dp) This is 4 flavors of a very similar computation. In summary (and ignoring scale factors): Dima's self+others: -norm2(J dp) + 2 x*t J* dp + norm2(J* dp) + norm2(x*) Dima's others : -norm2(J dp) + 2 x*t J* dp + norm2(J* dp) Cook's self+others: norm2(J dp) Cook's others : norm2(J dp) - norm2(J* dp) Let's compute these. dE1/dp = 0 at p+dp -> 0 = Jt x + JtJ dp - J*t x* - J*tJ* dp = JtJ dp - J*t x* - J*tJ* dp -> dp = inv(JtJ - J*tJ*) J*t x* Woodbury identity: inv(JtJ - J*t J*) = = inv(JtJ) - inv(JtJ) J*t inv(-I + J* inv(JtJ) J*t) J* inv(JtJ) Let A = J* inv(JtJ) J*t B = inv(A - I) So AB = BA = I+B Thus inv(JtJ - J*t J*) = = inv(JtJ) - inv(JtJ) J*t B J* inv(JtJ) and dp = inv(JtJ - J*tJ*) J*t x* = = inv(JtJ)J*t x* - inv(JtJ) J*t B J* inv(JtJ) J*t x* = inv(JtJ)J*t x* - inv(JtJ) J*t B A x* = inv(JtJ)J*t(I - B A) x* = -inv(JtJ)J*t B x* Then norm2(J dp) = x*t ( B J* inv() Jt J inv() J*t B ) x* = x*t ( B J* inv() J*t B ) x* = x*t ( B A B ) x* = x*t ( B + B*B ) x* 2 x*t J* dp = -2 x*t J* inv(JtJ)J*t B x* = = -2 x*t A B x* = = x*t (-2AB) x* = = x*t (-2I - 2B) x* norm2(J* dp) = x*t ( B J* inv() J*tJ* inv() J*t B ) x* = = x*t ( B A A B ) x* = = x*t ( I + 2B + B*B ) x* norm2(x*) = x*t ( I ) x* There're two ways to compute the "Dima's self-only" factor. I can simply say that the measurements' cost of norm2(x*) has been removed, so the factor is x*t I x* or I can 1. Remove measurements 2. Re-optimize 3. Look to see how the residual of x* changes This is different because I look at what happens to x*(p) when x* is no longer in the optimized set. State moves by dp. x* moves by J* dp. I look at dE = norm2(x* + J* dp) - norm2(x*) = 2 x*t J* dp + norm2(J* dp) = = x*t (-2I - 2B + I + 2B + B*B ) x* = = x*t (B*B - I) x* I expect that removing measurements from the optimization should make their residuals worse. I.e. I expect dE > 0. Let's check. Is B*B - I positive definite? Let's say that there's v,l such that (B*B-I)v = l v, norm2(v) = 1 --> BBv = (l+1)v vBBv = l+1 Let u = Bv -> norm2(u) = l+1 A = J* inv(JtJ) J*t B = inv(A - I) -> v = (A-I)u norm2(v) = 1 = norm2(Au) - 2ut A u + norm2(u) -> -> l = 2ut A u - norm2(Au) Let w = J*t u -> Au = J* inv(JtJ) w -> ut A u = wt inv(JtJ) w -> l = wt ( 2inv(JtJ) - inv(JtJ)J*tJ* inv(JtJ) ) w J*t J* = sum(outer(j*,j*)) = sum(outer(j,j)) - sum(outer(jnot*,jnot*)) = = JtJ - Jnot*t Jnot* -> l = wt ( 2inv(JtJ) - inv(JtJ)JtJinv(JtJ) + inv(JtJ)Jnot*tJnot* inv(JtJ) ) w = wt ( inv(JtJ) + inv(JtJ)Jnot*tJnot* inv(JtJ) ) w -> substitute -> l = wt ( C + CDC ) w where C >= 0 and D >= 0 wt C wt >= 0 wt CDC wt = (Ctw)t D Ctw >= 0 -> l >= 0 So B*B-I >= 0 and dE >= 0: removing a point will never make its own fit better - if dE >> 0: the other data does NOT support this measurement being correct - if dE ~~ 0: the other data supports this measurement being correct Putting all this together I get the expressions for the factors above: Dima's self+others: x*t (-B ) x* Dima's others : x*t (-B - I ) x* Dima's self (simple): x*t ( I ) x* Dima's self (interesting): x*t (B*B - I ) x* Cook's self+others: x*t ( B + B*B) x* Cook's others : x*t (-B - I ) x* One can also do a similar analysis to gauge our confidence in a solution. We can do this by 1. Solving the optimization problem 2. Querying the solution in a way we care about to produce a new feature group of measurements x = f - ref. We can compute J = dx/dp = df/dp. And we presumably know something about ref: like its probability distribution for instance. Example: we just calibrated a camera; we want to know how confident we are about a projection in a particular spot on the imager. I compute a projection in that spot: q = project(v). If added to the optimization I'd get x = q - ref where 'ref' would be the observed pixel coordinate. 3. If this new feature was added to the optimization, I can compute its outlierness factor in the same way as before. If we are confident in the solution in a particular spot, then we have consensus, and it wouldn't take much for these queries to look like outliers: the expected value of the outlierness would be high. Conversely, if we aren't well-defined then a wider set of points would fit the solution, and things wouldn't look very outliery Very similar analysis to the above: Let p,x,J represent the solution. The new feature we're adding is x* with jacobian J*. The solution would move by dp to get to the new optimum. Original solution is an optimum: Jt x = 0 If we add x* and move by dp, we get E1 = norm2(x + J dp) + norm2( x* + J* dp ) Thus "Dima's self+others factor" is (norm2(x + J dp) + norm2( x* + J* dp )) - norm2(x) = norm2(Jdp) + norm2( x* + J* dp ) We can choose to look at ONLY the effect on the other variables. That would produce "Dima's others factor" norm2(x + J dp) - norm2(x) = norm2(Jdp) This is very similar to Cook's D factor (which Dima apparently reinvented 40 years after the fact!). This factor looks not at the difference of norm2(x), but at norm2(difference) instead. So "Cook's others factor" is proportional to norm2(x - (x+J dp)). This is the "normal" Cook's D factor. We can also compute "Cook's self+others factor": norm2(concat(x + J dp, x* + J* dp) - concat(x,x*)) = norm2(x + J dp-x) + norm2(x* + J* dp - x*) = norm2(J dp) + norm2(J* dp) This is 4 flavors of a very similar computation. In summary (and ignoring scale factors): Dima's self+others: norm2(J dp) + 2 x*t J* dp + norm2(J* dp) + norm2(x*) Dima's others : norm2(J dp) Cook's self+others: norm2(J dp) + norm2(J* dp) Cook's others : norm2(J dp) The problem including the new point is also at an optimum: E1 = norm2(x + J dp) + norm2( x* + J* dp ) dE1/dp = 0 -> 0 = Jt x + JtJ dp + J*t x* + J*tJ*dp = = JtJ dp + J*t x* + J*tJ*dp -> dp = -inv(JtJ + J*tJ*) J*t x* Woodbury identity: -inv(JtJ + J*t J*) = = -inv(JtJ) + inv(JtJ) J*t inv(I + J* inv(JtJ) J*t) J* inv(JtJ) Let A = J* inv(JtJ) J*t (same as before) B = inv(A + I) (NOT the same as before) So AB = BA = I-B Thus -inv(JtJ + J*t J*) = = -inv(JtJ) + inv(JtJ) J*t B J* inv(JtJ) and dp = -inv(JtJ + J*tJ*) J*t x* = = -inv(JtJ)J*t x* + inv(JtJ) J*t B J* inv(JtJ) J*t x* = = -inv(JtJ)J*t x* + inv(JtJ) J*t B A x* = -inv(JtJ)J*t(I - B A) x* = -inv(JtJ)J*t B x* (same as before, but with a different B!) Then norm2(J dp) = x*t ( B J* inv() Jt J inv() J*t B ) x* = x*t ( B J* inv() J*t B ) x* = x*t ( B A B ) x* = x*t ( B - B*B ) x* 2 x*t J* dp = -2 x*t J* inv(JtJ)J*t B x* = = -2 x*t A B x* = = x*t (-2AB) x* = = x*t (-2I + 2B) x* norm2(J* dp) = x*t ( B J* inv() J*tJ* inv() J*t B ) x* = = x*t ( B A A B ) x* = = x*t ( I - 2B + B*B ) x* norm2(x*) = x*t ( I ) x* How do I compute "Dima's self" factor? The "simple" flavor from above looks at the new measurement only: norm2(x*). The "interesting" flavor, look at what happens to the measurements' error when they're added to the optimization set. State moves by dp. x* moves by J* dp. I look at dE = norm2(x* + J*dp) - norm2(x*) = 2 x*t J* dp + norm2(J* dp) = x*t (-2I + 2B + I - 2B + B*B) x* = x*t (B*B - I) x* I expect that adding a point to the optimization would make it fit better: dE < 0. Let's check. Let's say that there's v,l such that (B*B-I)v = l v, norm2(v) = 1 --> BBv = (l+1)v vBBv = l+1 Let u = Bv -> norm2(u) = l+1 A = J* inv(JtJ) J*t B = inv(A + I) -> v = (A+I)u norm2(v) = 1 = norm2(Au) + 2ut A u + norm2(u) -> -> l = -2ut A u - norm2(Au) Let w = J*t u -> Au = J* inv(JtJ) w -> ut A u = wt inv(JtJ) w -> l = -2 wt inv(JtJ) w - norm2(Au) Since inv(JtJ) > 0 -> l < 0. As expected So B*B-I is negative definite: adding measurements to the optimization set makes them fit better Putting all this together I get the expressions for the factors above: Dima's self+others: x*t (B ) x* Dima's others : x*t (B - B*B) x* Dima's self (simple): x*t (I ) x* Dima's self (interesting): x*t (B*B - I) x* Cook's self+others: x*t (I - B ) x* Cook's others : x*t (B - B*B) x* These expressions are all tested and verified in mrcal/analyses/outlierness-test.py There're several slightly-different definitions of Cook's D and of a rule-of-thumb threshold floating around on the internet. Wikipedia says: D = norm2(x_io - x_i)^2 / (Nstate * norm2(x_io)/(Nmeasurements - Nstate)) D_threshold = 1 An article https://www.nature.com/articles/nmeth.3812 says D = norm2(x_io - x_i)^2 / ((Nstate+1) * norm2(x_io)/(Nmeasurements - Nstate -1)) D_threshold = 4/Nmeasurements I haven't tracked down the reference for this second definition, but it looks more reasonable, and I use it here. Here I use the second definition. That definition expands to k = xo^2 / ((Nstate+1) * norm2(x)/(Nmeasurements - Nstate -1)) B = 1.0/(jt inv(JtJ) j - 1) f = k * (B + B*B) I report normalized outlierness factors so that the threshold is 1. Thus I use k = Nmeasurements / (4*((Nstate+1) * norm2(x)/(Nmeasurements - Nstate - 1))) */ static void accum_outlierness_factor(// output double* factor, // inputs const double* x, // A is symmetric. I store the upper triangle row-first const double* A, // if outliers are grouped into features, the // feature size is set here int featureSize, double k) { // This implements Dima's self+outliers factor. // // from the derivation in a big comment above I have // // f = k (x*t (-B) x* ) // // where B = inv(A - I) and // A = J* inv(JtJ) J*t // I only implemented featureSize == 1 and 2 so far. if(featureSize <= 1) { featureSize = 1; double denom = 1.0 - *A; if( fabs(denom) < 1e-8 ) { *factor = DBL_MAX; // definitely an outlier return; } else *factor = x[0]*x[0] / denom; } else if(featureSize == 2) { double det = (1.0-A[0])*(1.0-A[2]) - A[1]*A[1]; if( fabs(det) < 1e-8 ) { *factor = DBL_MAX; // definitely an outlier return; } else { double B00_det = A[2] - 1.0; double B11_det = A[0] - 1.0; double B01_det = -A[1]; // inner(x, Bx) double xBx = (x[0]*x[0]*B00_det + x[0]*x[1]*B01_det * 2.0 + x[1]*x[1]*B11_det) / det; // norm2(Bx) __attribute__((unused)) double v1 = x[0]*B00_det + x[1]*B01_det; __attribute__((unused)) double v2 = x[0]*B01_det + x[1]*B11_det; __attribute__((unused)) double xBBx = (v1*v1 + v2*v2) / (det*det); // // mine self+others // *factor = -xBx; // // mine others / cook others // *factor = -(x[0]*x[0] + x[1]*x[1]) - xBx; // cook's self+others *factor = xBx + xBBx; } } else { SAY("featureSize > 2 not implemented yet. Got featureSize=%d", featureSize); ASSERT(0); } #warning This is a hack. The threshold should be 1.0, and the scaling should make sure that is the case. I am leaving it for now k /= 8.; *factor *= k; } static void getOutliernessScale(// out double* scale, // if *scale > 0, I just keep what I have // in int Nmeasurements, int Nstate, int NoutlierFeatures, int featureSize, double norm2_x) { // if *scale > 0, I just keep what I have if(*scale > 0.0) return; int Nmeasurements_outliers = NoutlierFeatures*featureSize; int Nmeasurements_nonoutliers = Nmeasurements - Nmeasurements_outliers; *scale = (double)(Nmeasurements_nonoutliers) / (4.*((double)(Nstate+1) * norm2_x/(double)(Nmeasurements_nonoutliers - Nstate - 1))); } static bool getOutliernessFactors_dense( // output double* factors, // Nfeatures factors // output, input double* scale, // if <0 then I recompute // inputs // if outliers are grouped into features, // the feature size is set here int featureSize, int Nfeatures, int NoutlierFeatures, const dogleg_operatingPoint_t* point, dogleg_solverContext_t* ctx ) { // cholmod_spsolve() and cholmod_solve()) work in chunks of 4, so I do this in // chunks of 4 too. I pass it rows of J, 4 at a time. Note that if I have // measurement features, I don't want these to cross chunk boundaries, so I set // up chunk_size=lcm(N,4) int chunk_size = 4; if(featureSize <= 1) featureSize = 1; if(featureSize > 1) { // haven't implemented anything else yet. Don't have lcm() readily available ASSERT(featureSize == 2); // chunk_size = lcm(chunk_size,featureSize) } int Nstate = ctx->Nstate; int Nmeasurements = ctx->Nmeasurements; bool result = false; double* invJtJ_Jt = malloc(Nstate*chunk_size*sizeof(double)); if(invJtJ_Jt == NULL) { SAY("Couldn't allocate invJtJ_Jt!"); goto done; } getOutliernessScale(scale, Nmeasurements, Nstate, NoutlierFeatures, featureSize, point->norm2_x); int i_measurement_valid_chunk_start = -1; int i_measurement_valid_chunk_last = -1; int i_measurement = 0; for( int i_feature=0; i_feature i_measurement_valid_chunk_last ) { bool pinvresult = pseudoinverse_J_dense(invJtJ_Jt, point, ctx, i_measurement, chunk_size); if(!pinvresult) { SAY("Couldn't compute pinv!"); goto done; } i_measurement_valid_chunk_start = i_measurement; i_measurement_valid_chunk_last = i_measurement+chunk_size-1; } // from the derivation in a big comment in above I have // // f = scale (xot (...) xo ) // // where A = Jo inv(JtJ) Jot // // A is symmetric. I store the upper triangle double A[featureSize*(featureSize+1)/2]; int iA=0; for(int i=0; iJ_dense[Nstate* i_measurement+j + k]; } accum_outlierness_factor(&factors[i_feature], &point->x[i_measurement], A, featureSize, *scale); } result = true; done: free(invJtJ_Jt); return result; } static bool getOutliernessFactors_sparse( // output double* factors, // Nfeatures factors // output, input double* scale, // if <0 then I recompute // inputs // if outliers are grouped into features, // the feature size is set here int featureSize, int Nfeatures, int NoutlierFeatures, const dogleg_operatingPoint_t* point, dogleg_solverContext_t* ctx ) { // cholmod_spsolve() and cholmod_solve()) work in chunks of 4, so I do this in // chunks of 4 too. I pass it rows of J, 4 at a time. Note that if I have // measurement features, I don't want these to cross chunk boundaries, so I set // up chunk_size=lcm(N,4) int chunk_size = 4; if(featureSize <= 1) featureSize = 1; if(featureSize > 1) { // haven't implemented anything else yet. Don't have lcm() readily available ASSERT(featureSize == 2); // chunk_size = lcm(chunk_size,featureSize) } int Nstate = ctx->Nstate; int Nmeasurements = ctx->Nmeasurements; bool result = false; cholmod_dense* invJtJ_Jt = NULL; cholmod_dense* Jt_chunk = cholmod_allocate_dense( Nstate, chunk_size, Nstate, CHOLMOD_REAL, &ctx->common ); if(!Jt_chunk) { SAY("Couldn't allocate Jt_chunk!"); goto done; } getOutliernessScale(scale, Nmeasurements, Nstate, NoutlierFeatures, featureSize, point->norm2_x); int i_measurement_valid_chunk_start = -1; int i_measurement_valid_chunk_last = -1; int i_measurement = 0; for( int i_feature=0; i_feature i_measurement_valid_chunk_last ) { if(invJtJ_Jt) cholmod_free_dense(&invJtJ_Jt, &ctx->common); invJtJ_Jt = pseudoinverse_J_sparse(point, ctx, i_measurement, chunk_size, Jt_chunk); if(invJtJ_Jt == NULL) { SAY("Couldn't compute pinv!"); goto done; } i_measurement_valid_chunk_start = i_measurement; i_measurement_valid_chunk_last = i_measurement+chunk_size-1; } // from the derivation in a big comment in above I have // // f = scale (xot (...) xo ) // // where A = Jo inv(JtJ) Jot // // A is symmetric. I store the upper triangle double A[featureSize*(featureSize+1)/2]; int iA=0; for(int i=0; iJt, i_measurement+j); l < P(point->Jt, i_measurement+j+1); l++) { int k = I(point->Jt, l); A[iA] += ((double*)invJtJ_Jt->x)[Nstate*(i_measurement+i-i_measurement_valid_chunk_start) + k] * X(point->Jt, l); } } accum_outlierness_factor(&factors[i_feature], &point->x[i_measurement], A, featureSize, *scale); } result = true; done: if(Jt_chunk) cholmod_free_dense(&Jt_chunk, &ctx->common); if(invJtJ_Jt) cholmod_free_dense(&invJtJ_Jt, &ctx->common); return result; } bool dogleg_getOutliernessFactors( // output double* factors, // Nfeatures factors // output, input double* scale, // if <0 then I recompute // inputs // if outliers are grouped into features, the // feature size is set here int featureSize, int Nfeatures, int NoutlierFeatures, // how many outliers we already have dogleg_operatingPoint_t* point, dogleg_solverContext_t* ctx ) { if(featureSize <= 1) featureSize = 1; dogleg_computeJtJfactorization( point, ctx ); bool result; if(ctx->is_sparse) result = getOutliernessFactors_sparse(factors, scale, featureSize, Nfeatures, NoutlierFeatures, point, ctx); else result = getOutliernessFactors_dense(factors, scale, featureSize, Nfeatures, NoutlierFeatures, point, ctx); #if 0 if( result ) { int Nstate = ctx->Nstate; int Nmeasurements = ctx->Nmeasurements; static FILE* fp = NULL; if(fp == NULL) fp = fopen("/tmp/check-outlierness.py", "w"); static int count = -1; count++; if(count > 5) goto done; fprintf(fp, "# WARNING: all J here are unscaled with SCALE_....\n"); if(count == 0) { fprintf(fp, "import numpy as np\n" "import numpysane as nps\n" "np.set_printoptions(linewidth=100000)\n" "\n"); } fprintf(fp, "x%d = np.array((", count); for(int j=0;jx[j]); fprintf(fp,"))\n"); if( ctx->is_sparse ) { fprintf(fp, "J%d = np.zeros((%d,%d))\n", count, Nmeasurements, Nstate); for(int imeas=0;imeasJt, imeas); j < (int)P(point->Jt, imeas+1); j++) { int irow = I(point->Jt, j); fprintf(fp, "J%d[%d,%d] = %.20g\n", count, imeas, irow, X(point->Jt, j)); } } } else { fprintf(fp, "J%d = np.array((\n", count); for(int j=0;jJ_dense[j*Nstate+i]); fprintf(fp, "),\n"); } fprintf(fp,"))\n"); } fprintf(fp, "Nmeasurements = %d\n", Nmeasurements); fprintf(fp, "Nstate = %d\n", Nstate); fprintf(fp, "Nfeatures = %d\n", Nfeatures); fprintf(fp, "featureSize = %d\n", featureSize); fprintf(fp, "NoutlierFeatures = %d\n", NoutlierFeatures); fprintf(fp, "scale_got = %.20f\n", *scale); fprintf(fp, "factors_got = np.array(("); for(int j=0;j B = inv(I + A) -> tr(B) = (2+a00+a11) / ((1+a00)*(1+a11) - a01^2) */ // This is Jt because cholmod thinks in terms of col-first instead of // row-first int Jt_p[featureSize+1]; int Jt_i[NstateActive*featureSize]; for(int i=0; i<=featureSize; i++) { Jt_p[i] = i*NstateActive; if(i==featureSize) break; for(int j=0; jNstate, .ncol = featureSize, .nzmax = NstateActive*featureSize, .p = (void*)Jt_p, .i = (void*)Jt_i, .x = (double*)JqueryFeature, .sorted = 1, .packed = 1, .stype = 0, // NOT symmetric .itype = CHOLMOD_INT, .xtype = CHOLMOD_REAL, .dtype = CHOLMOD_DOUBLE}; // Really shouldn't need to do this every time. In fact I probably don't need // to do it at all, since this will have been done by the solver during the // last step dogleg_computeJtJfactorization( point, ctx ); cholmod_sparse* invJtJ_Jp = cholmod_spsolve(CHOLMOD_A, ctx->factorization, &Jt_query_sparse, &ctx->common); // Now I need trace(matmult(Jquery, invJtJ_Jp)) // haven't implemented anything else yet ASSERT(featureSize == 2); double A[4] = {}; // gah. only elements 0,1,3 will be stored. for(int i=0; i= istateActive) { if(row >= istateActive+NstateActive) break; int ic0 = i*featureSize; for(int k=i; kcommon); double invB00 = A[0]+1.0; double invB01 = A[1]; double invB11 = A[3]+1.0; double det_invB_recip = 1.0/(invB00*invB11 - invB01*invB01); double B00 = invB11 * det_invB_recip; double B11 = invB00 * det_invB_recip; __attribute__((unused)) double B01 = -invB01 * det_invB_recip; double traceB = B00 + B11; __attribute__((unused)) double traceBB = B00*B00 + 2.0*B01*B01 + B11*B11; #if 0 static int count = -1; count++; if(count <= 5) { int Nstate = ctx->Nstate; int Nmeasurements = ctx->Nmeasurements; static FILE* fp = NULL; if(fp == NULL) fp = fopen("/tmp/check-query-outlierness.py", "w"); fprintf(fp, "# WARNING: all J here are unscaled with SCALE_....\n"); if(count == 0) { fprintf(fp, "import numpy as np\n" "import numpysane as nps\n" "np.set_printoptions(linewidth=100000)\n" "\n"); } fprintf(fp, "x%d = np.array((", count); for(int j=0;jx[j]); fprintf(fp,"))\n"); if( ctx->is_sparse ) { fprintf(fp, "J%d = np.zeros((%d,%d))\n", count, Nmeasurements, Nstate); for(int imeas=0;imeasJt, imeas); j < (int)P(point->Jt, imeas+1); j++) { int irow = I(point->Jt, j); fprintf(fp, "J%d[%d,%d] = %g\n", count, imeas, irow, X(point->Jt, j)); } } } else { fprintf(fp, "J%d = np.array((\n", count); for(int j=0;jJ_dense[j*Nstate+i]); fprintf(fp, "),\n"); } fprintf(fp,"))\n"); } fprintf(fp, "Nmeasurements = %d\n", Nmeasurements); fprintf(fp, "Nstate = %d\n", Nstate); fprintf(fp, "featureSize = %d\n", featureSize); fprintf(fp, "traceB_got = %.20g\n", traceB); fprintf(fp, "Jq = np.zeros((featureSize, Nstate), dtype=float)\n"); for(int i=0; iNmeasurements; int Nstate = ctx->Nstate; double scale = -1.0; getOutliernessScale(&scale, Nmeasurements, Nstate, NoutlierFeatures, featureSize, point->norm2_x); // // Dima's self+others // return scale * traceB; // Cook's self+others return scale * (2.0 - traceB); // // Dima's others/Cook's others // // This one is non-monotonic in outlierness-test // return scale * (traceB - traceBB); } #define OUTLIER_CONFIDENCE_DROP_THRESHOLD 0.05 bool dogleg_markOutliers(// output, input struct dogleg_outliers_t* markedOutliers, double* scale, // if <0 then I recompute // output, input int* Noutliers, // input double (getConfidence)(int i_feature_exclude), // if outliers are grouped into features, the feature // size is set here int featureSize, int Nfeatures, dogleg_operatingPoint_t* point, dogleg_solverContext_t* ctx) { if(featureSize <= 1) featureSize = 1; bool markedAny = false; double* factors = malloc(Nfeatures * sizeof(double)); if(factors == NULL) { SAY("Error allocating factors"); goto done; } if(!dogleg_getOutliernessFactors(factors, scale, featureSize, Nfeatures, *Noutliers, point, ctx)) goto done; // I have my list of POTENTIAL outliers (any that have factor > 1.0). I // check to see how much confidence I would lose if I were to throw out any // of these measurements, and accept the outlier ONLY if the confidence loss // is acceptable double confidence0 = getConfidence(-1); if( confidence0 < 0.0 ) goto done; SAY_IF_VERBOSE("Initial confidence: %g", confidence0); *Noutliers = 0; for(int i=0; i // License: GNU LGPL . #pragma once #include #include typedef void (dogleg_callback_t)(const double* p, double* x, cholmod_sparse* Jt, void* cookie); typedef void (dogleg_callback_dense_t)(const double* p, double* x, double* J, void* cookie); // an operating point of the solver typedef struct { // The pointers in this structure are all indices into a single "pool" array // (see allocOperatingPoint()). I thus don't need to store the pointers at // all, and can just index that one array directly, but that would break my // ABI double* p; double* x; double norm2_x; union { cholmod_sparse* Jt; double* J_dense; // row-first: grad0, grad1, grad2, ... }; double* Jt_x; // the cached update vectors. It's useful to cache these so that when a step is rejected, we can // reuse these when we retry double* updateCauchy; union { cholmod_dense* updateGN_cholmoddense; double* updateGN_dense; }; double updateCauchy_lensq, updateGN_lensq; // update vector lengths // whether the current update vectors are correct or not int updateCauchy_valid, updateGN_valid; int didStepToEdgeOfTrustRegion; double* step_to_here; double step_to_here_len_sq; } dogleg_operatingPoint_t; // The newer APIs ( dogleg_...2() ) take a dogleg_parameters2_t argument for // their settings, while the older ones use a global set of parameters specified // with dogleg_set_...(). This global-parameters approach doesn't work if we // have multiple users of libdogleg in the same application typedef struct { int max_iterations; int dogleg_debug; // it is cheap to reject a too-large trust region, so I start with something // "large". The solver will quickly move down to something reasonable. Only the // user really knows if this is "large" or not, so they should change this via // the API double trustregion0; // These are probably OK to leave alone. Tweaking them can maybe result in // slightly faster convergence double trustregion_decrease_factor; double trustregion_decrease_threshold; double trustregion_increase_factor; double trustregion_increase_threshold; // The termination thresholds. Documented in the header double Jt_x_threshold; double update_threshold; double trustregion_threshold; } dogleg_parameters2_t; // solver context. This has all the internal state of the solver typedef struct { cholmod_common common; union { dogleg_callback_t* f; dogleg_callback_dense_t* f_dense; }; void* cookie; // between steps, beforeStep contains the operating point of the last step. // afterStep is ONLY used while making the step. Externally, use beforeStep // unless you really know what you're doing dogleg_operatingPoint_t* beforeStep; dogleg_operatingPoint_t* afterStep; // The result of the last JtJ factorization performed. Note that JtJ is not // necessarily factorized at every step, so this is NOT guaranteed to contain // the factorization of the most recent JtJ union { cholmod_factor* factorization; // This is a factorization of JtJ, stored as a packed symmetric matrix // returned by dpptrf('L', ...) double* factorization_dense; }; // Have I ever seen a singular JtJ? If so, I add this constant to the diagonal // from that point on. This is a simple and fast way to deal with // singularities. This constant starts at 0, and is increased every time a // singular JtJ is encountered. This is suboptimal but works for me for now. double lambda; // Are we using sparse math (cholmod)? int is_sparse; int Nstate, Nmeasurements; const dogleg_parameters2_t* parameters; } dogleg_solverContext_t; // Fills in the given structure with the default parameter set void dogleg_getDefaultParameters(dogleg_parameters2_t* parameters); void dogleg_setMaxIterations(int n); void dogleg_setTrustregionUpdateParameters(double downFactor, double downThreshold, double upFactor, double upThreshold); // The argument is a bit-mapped integer. Should be a bit-field structure or // enum, but for API backwards-compatibility, I keep this an integer. // // if(debug == 0 ): no diagnostic output // if(debug & DOGLEG_DEBUG_VNLOG): output vnlog diagnostics to stdout // if(debug & ~DOGLEG_DEBUG_VNLOG): output human-oriented diagnostics to stderr #define DOGLEG_DEBUG_VNLOG (1 << 30) void dogleg_setDebug(int debug); // The following parameters reflect the scaling of the problem being solved, so // the user is strongly encouraged to tweak these. The defaults are set // semi-arbitrarily // The size of the trust region at start. It is cheap to reject a too-large // trust region, so this should be something "large". Say 10x the length of an // "expected" step size void dogleg_setInitialTrustregion(double t); // termination thresholds. These really depend on the scaling of the input // problem, so the user should set these appropriately // // Jt_x threshold: // The function being minimized is E = norm2(x) where x is a function of the state p. // dE/dp = 2*Jt*x where Jt is transpose(dx/dp). // if( for every i fabs(Jt_x[i]) < JT_X_THRESHOLD ) // { we are done; } // // update threshold: // if(for every i fabs(state_update[i]) < UPDATE_THRESHOLD) // { we are done; } // // trust region threshold: // if(trustregion < TRUSTREGION_THRESHOLD) // { we are done; } // // to leave a particular threshold alone, use a value <= 0 here void dogleg_setThresholds(double Jt_x, double update, double trustregion); // The main optimization function. Initial estimate of the solution passed in p, // final optimized solution returned in p. p has Nstate variables. There are // Nmeas measurements, the jacobian matrix has NJnnz non-zero entries. // // The evaluation function is given in the callback f; this function is passed // the given cookie // // If we want to get the full solver state when we're done, a non-NULL // returnContext pointer can be given. If this is done then THE USER IS // RESPONSIBLE FOR FREEING ITS MEMORY WITH dogleg_freeContext() double dogleg_optimize(double* p, unsigned int Nstate, unsigned int Nmeas, unsigned int NJnnz, dogleg_callback_t* f, void* cookie, dogleg_solverContext_t** returnContext); double dogleg_optimize2(double* p, unsigned int Nstate, unsigned int Nmeas, unsigned int NJnnz, dogleg_callback_t* f, void* cookie, const dogleg_parameters2_t* parameters, dogleg_solverContext_t** returnContext); // Main optimization function if we're using dense linear algebra. The arguments // are very similar to the sparse version: dogleg_optimize() double dogleg_optimize_dense(double* p, unsigned int Nstate, unsigned int Nmeas, dogleg_callback_dense_t* f, void* cookie, dogleg_solverContext_t** returnContext); double dogleg_optimize_dense2(double* p, unsigned int Nstate, unsigned int Nmeas, dogleg_callback_dense_t* f, void* cookie, const dogleg_parameters2_t* parameters, dogleg_solverContext_t** returnContext); // Compute the cholesky decomposition of JtJ. This function is only exposed if // you need to touch libdogleg internals via returnContext. Sometimes after // computing an optimization you want to do stuff with the factorization of JtJ, // and this call ensures that the factorization is available. Most people don't // need this function. If the comment wasn't clear, you don't need this // function. void dogleg_computeJtJfactorization(dogleg_operatingPoint_t* point, dogleg_solverContext_t* ctx); // Generates a plain text table that contains gradient checks. This table can be // easily parsed with "vnlog" tools void dogleg_testGradient(unsigned int var, const double* p0, unsigned int Nstate, unsigned int Nmeas, unsigned int NJnnz, dogleg_callback_t* f, void* cookie); void dogleg_testGradient_dense(unsigned int var, const double* p0, unsigned int Nstate, unsigned int Nmeas, dogleg_callback_dense_t* f, void* cookie); // If we want to get the full solver state when we're done optimizing, we can // pass a non-NULL returnContext pointer to dogleg_optimize(). If we do this, // then the user MUST call dogleg_freeContext() to deallocate the pointer when // the USER is done void dogleg_freeContext(dogleg_solverContext_t** ctx); // Computes outlierness factors. This function is experimental, and subject to // change. bool dogleg_getOutliernessFactors( // output double* factors, // Nfeatures factors // output, input double* scale, // if <0 then I recompute // inputs // if outliers are grouped into features, the feature size is // stated here int featureSize, int Nfeatures, int NoutlierFeatures, // how many outliers we already have dogleg_operatingPoint_t* point, dogleg_solverContext_t* ctx ); // This stuff is experimental, and subject to change. struct dogleg_outliers_t { unsigned char marked : 1; }; bool dogleg_markOutliers(// output, input struct dogleg_outliers_t* markedOutliers, double* scale, // if <0 then I recompute // output, input int* Noutliers, // number of outliers before and after this call // input double (getConfidence)(int i_feature_exclude), // if outliers are grouped into features, the feature size is // stated here int featureSize, int Nfeatures, dogleg_operatingPoint_t* point, dogleg_solverContext_t* ctx); void dogleg_reportOutliers( double (getConfidence)(int i_feature_exclude), double* scale, // if <0 then I recompute // if outliers are grouped into features, the feature size // is stated here int featureSize, int Nfeatures, int Noutliers, // how many outliers we already have dogleg_operatingPoint_t* point, dogleg_solverContext_t* ctx); double dogleg_getOutliernessTrace_newFeature_sparse(const double* JqueryFeature, int istateActive, int NstateActive, int featureSize, int NoutlierFeatures, dogleg_operatingPoint_t* point, dogleg_solverContext_t* ctx); libdogleg-0.16/sample.c000066400000000000000000000212021445527023200150160ustar00rootroot00000000000000// -*- mode: C; c-basic-offset: 2 -*- #include #include #include #include #include "dogleg.h" // This is a trivial sample application to demonstrate libdogleg in action. // Let's say that I have a simple non-linear model // // a*b * x**2 + b*c * y**2 + c * x*y + d * x + e * y * f = measurements // // here I'm trying to estimate the vector (a,b,c,d,e,f) to most closely fit the // data vector measurements. This problem is clearly non-sparse, but both sparse // and dense versions of libdogleg are demonstrated here. // // First I generate some noise-corrupted data, and then use libdogleg to solve // the problem. // My state vector (a,b,c,d,e,f) has 6 elements #define Nstate 6 // I simulate my measurements using these as the TRUE values for the model #define REFERENCE_A 1.0 #define REFERENCE_B 2.0 #define REFERENCE_C 3.0 #define REFERENCE_D 4.0 #define REFERENCE_E 5.0 #define REFERENCE_F 6.0 // I simulate by sampling the x-y space in a grid. This grid is defined here #define SIMULATION_GRID_WIDTH 10 #define SIMULATION_GRID_MIN -10 #define SIMULATION_GRID_DELTA 2.0 #define Nmeasurements (SIMULATION_GRID_WIDTH*SIMULATION_GRID_WIDTH) static double allx [Nmeasurements]; static double ally [Nmeasurements]; static double allm_simulated_noisy[Nmeasurements]; static void simulate(void) { for(int i=0; ip; int* Jcolidx = (int*)Jt->i; double* Jval = (double*)Jt->x; int iJacobian = 0; #define STORE_JACOBIAN(col, g) \ do \ { \ Jcolidx[ iJacobian ] = col; \ Jval [ iJacobian ] = g; \ iJacobian++; \ } while(0) double norm2_x = 0.0; for(int i=0; i= argc) { fprintf(stderr, usage, argv[0]); return 1; } if(0 == strcmp("--diag-vnlog", argv[iarg])) { debug |= DOGLEG_DEBUG_VNLOG; iarg++; continue; } if(0 == strcmp("--diag-human", argv[iarg])) { debug |= 1; iarg++; continue; } break; } if(iarg >= argc) { fprintf(stderr, usage, argv[0]); return 1; } if( 0 == strcmp(argv[iarg], "dense") ) { fprintf(stderr, "Using DENSE math\n"); is_sparse = 0; } else if( 0 == strcmp(argv[iarg], "sparse") ) { fprintf(stderr, "Using SPARSE math\n"); is_sparse = 1; } else { fprintf(stderr, usage, argv[0]); return 1; } iarg++; if(iarg == argc-1) { if( 0 != strcmp("test-gradients", argv[iarg])) { fprintf(stderr, usage, argv[0]); return 1; } fprintf(stderr, "Testing the gradients only\n"); test_gradients = 1; } else if(iarg == argc) { // not testing gradients. We're good } else { fprintf(stderr, usage, argv[0]); return 1; } } srandom( 0 ); // I want determinism here generateSimulationGrid(); simulate(); dogleg_parameters2_t dogleg_parameters; dogleg_getDefaultParameters(&dogleg_parameters); dogleg_parameters.dogleg_debug = debug; double p[Nstate]; // I start solving with all my state variables set to some random noise for(int i=0; i