| /* |
| * Library: lmfit (Levenberg-Marquardt least squares fitting) |
| * |
| * File: lmmin.c |
| * |
| * Contents: Levenberg-Marquardt minimization. |
| * |
| * Copyright: MINPACK authors, The University of Chikago (1980-1999) |
| * Joachim Wuttke, Forschungszentrum Juelich GmbH (2004-2013) |
| * |
| * License: see ../COPYING (FreeBSD) |
| * |
| * Homepage: apps.jcns.fz-juelich.de/lmfit |
| */ |
| |
| #include <assert.h> |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <math.h> |
| #include <float.h> |
| #include "lmmin.h" |
| |
| #define MIN(a, b) (((a) <= (b)) ? (a) : (b)) |
| #define MAX(a, b) (((a) >= (b)) ? (a) : (b)) |
| #define SQR(x) (x) * (x) |
| |
| /* Declare functions that do the heavy numerics. |
| Implementions are in this source file, below lmmin. |
| Dependences: lmmin calls lmpar, which calls qrfac and qrsolv. */ |
| void lm_lmpar(const int n, double* r, const int ldr, const int* Pivot, |
| const double* diag, const double* qtb, const double delta, |
| double* par, double* x, double* Sdiag, double* aux, double* xdi); |
| void lm_qrfac(const int m, const int n, double* A, int* Pivot, double* Rdiag, |
| double* Acnorm, double* W); |
| void lm_qrsolv(const int n, double* r, const int ldr, const int* Pivot, |
| const double* diag, const double* qtb, double* x, |
| double* Sdiag, double* W); |
| |
| /******************************************************************************/ |
| /* Numeric constants */ |
| /******************************************************************************/ |
| |
| /* Set machine-dependent constants to values from float.h. */ |
| #define LM_MACHEP DBL_EPSILON /* resolution of arithmetic */ |
| #define LM_DWARF DBL_MIN /* smallest nonzero number */ |
| #define LM_SQRT_DWARF sqrt(DBL_MIN) /* square should not underflow */ |
| #define LM_SQRT_GIANT sqrt(DBL_MAX) /* square should not overflow */ |
| #define LM_USERTOL 30 * LM_MACHEP /* users are recommended to require this */ |
| |
| /* If the above values do not work, the following seem good for an x86: |
| LM_MACHEP .555e-16 |
| LM_DWARF 9.9e-324 |
| LM_SQRT_DWARF 1.e-160 |
| LM_SQRT_GIANT 1.e150 |
| LM_USER_TOL 1.e-14 |
| The following values should work on any machine: |
| LM_MACHEP 1.2e-16 |
| LM_DWARF 1.0e-38 |
| LM_SQRT_DWARF 3.834e-20 |
| LM_SQRT_GIANT 1.304e19 |
| LM_USER_TOL 1.e-14 |
| */ |
| |
| /* Predefined control parameter sets (msgfile=NULL means stdout). */ |
| const lm_control_struct lm_control_double = { |
| LM_USERTOL, LM_USERTOL, LM_USERTOL, LM_USERTOL, |
| 100., 100, 1, NULL, 0, -1, -1}; |
| const lm_control_struct lm_control_float = { |
| 1.e-7, 1.e-7, 1.e-7, 1.e-7, |
| 100., 100, 1, NULL, 0, -1, -1}; |
| |
| /******************************************************************************/ |
| /* Message texts (indexed by status.info) */ |
| /******************************************************************************/ |
| |
| const char* lm_infmsg[] = { |
| "found zero (sum of squares below underflow limit)", |
| "converged (the relative error in the sum of squares is at most tol)", |
| "converged (the relative error of the parameter vector is at most tol)", |
| "converged (both errors are at most tol)", |
| "trapped (by degeneracy; increasing epsilon might help)", |
| "exhausted (number of function calls exceeding preset patience)", |
| "failed (ftol<tol: cannot reduce sum of squares any further)", |
| "failed (xtol<tol: cannot improve approximate solution any further)", |
| "failed (gtol<tol: cannot improve approximate solution any further)", |
| "crashed (not enough memory)", |
| "exploded (fatal coding error: improper input parameters)", |
| "stopped (break requested within function evaluation)", |
| "found nan (function value is not-a-number or infinite)"}; |
| |
| const char* lm_shortmsg[] = { |
| "found zero", |
| "converged (f)", |
| "converged (p)", |
| "converged (2)", |
| "degenerate", |
| "call limit", |
| "failed (f)", |
| "failed (p)", |
| "failed (o)", |
| "no memory", |
| "invalid input", |
| "user break", |
| "found nan"}; |
| |
| /******************************************************************************/ |
| /* Monitoring auxiliaries. */ |
| /******************************************************************************/ |
| |
| void lm_print_pars(int nout, const double* par, FILE* fout) |
| { |
| int i; |
| for (i = 0; i < nout; ++i) |
| fprintf(fout, " %16.9g", par[i]); |
| fprintf(fout, "\n"); |
| } |
| |
| /******************************************************************************/ |
| /* lmmin (main minimization routine) */ |
| /******************************************************************************/ |
| |
| void lmmin(const int n, double* x, const int m, const void* data, |
| void (*evaluate)(const double* par, const int m_dat, |
| const void* data, double* fvec, int* userbreak), |
| const lm_control_struct* C, lm_status_struct* S) |
| { |
| int j, i; |
| double actred, dirder, fnorm, fnorm1, gnorm, pnorm, prered, ratio, step, |
| sum, temp, temp1, temp2, temp3; |
| |
| /*** Initialize internal variables. ***/ |
| |
| int maxfev = C->patience * (n+1); |
| |
| int inner_success; /* flag for loop control */ |
| double lmpar = 0; /* Levenberg-Marquardt parameter */ |
| double delta = 0; |
| double xnorm = 0; |
| double eps = sqrt(MAX(C->epsilon, LM_MACHEP)); /* for forward differences */ |
| |
| int nout = C->n_maxpri == -1 ? n : MIN(C->n_maxpri, n); |
| |
| /* Reinterpret C->msgfile=NULL as stdout (which is unavailable for |
| compile-time initialization of lm_control_double and similar). */ |
| FILE* msgfile = C->msgfile ? C->msgfile : stdout; |
| |
| /*** Default status info; must be set before first return statement. ***/ |
| |
| S->outcome = 0; /* status code */ |
| S->userbreak = 0; |
| S->nfev = 0; /* function evaluation counter */ |
| |
| /*** Check input parameters for errors. ***/ |
| |
| if (n <= 0) { |
| fprintf(stderr, "lmmin: invalid number of parameters %i\n", n); |
| S->outcome = 10; |
| return; |
| } |
| if (m < n) { |
| fprintf(stderr, "lmmin: number of data points (%i) " |
| "smaller than number of parameters (%i)\n", |
| m, n); |
| S->outcome = 10; |
| return; |
| } |
| if (C->ftol < 0 || C->xtol < 0 || C->gtol < 0) { |
| fprintf(stderr, |
| "lmmin: negative tolerance (at least one of %g %g %g)\n", |
| C->ftol, C->xtol, C->gtol); |
| S->outcome = 10; |
| return; |
| } |
| if (maxfev <= 0) { |
| fprintf(stderr, "lmmin: nonpositive function evaluations limit %i\n", |
| maxfev); |
| S->outcome = 10; |
| return; |
| } |
| if (C->stepbound <= 0) { |
| fprintf(stderr, "lmmin: nonpositive stepbound %g\n", C->stepbound); |
| S->outcome = 10; |
| return; |
| } |
| if (C->scale_diag != 0 && C->scale_diag != 1) { |
| fprintf(stderr, "lmmin: logical variable scale_diag=%i, " |
| "should be 0 or 1\n", |
| C->scale_diag); |
| S->outcome = 10; |
| return; |
| } |
| |
| /*** Allocate work space. ***/ |
| |
| /* Allocate total workspace with just one system call */ |
| char* ws; |
| if ((ws = malloc((2*m + 5*n + m*n) * sizeof(double) + |
| n * sizeof(int))) == NULL) { |
| S->outcome = 9; |
| return; |
| } |
| |
| /* Assign workspace segments. */ |
| char* pws = ws; |
| double* fvec = (double*)pws; |
| pws += m * sizeof(double) / sizeof(char); |
| double* diag = (double*)pws; |
| pws += n * sizeof(double) / sizeof(char); |
| double* qtf = (double*)pws; |
| pws += n * sizeof(double) / sizeof(char); |
| double* fjac = (double*)pws; |
| pws += n * m * sizeof(double) / sizeof(char); |
| double* wa1 = (double*)pws; |
| pws += n * sizeof(double) / sizeof(char); |
| double* wa2 = (double*)pws; |
| pws += n * sizeof(double) / sizeof(char); |
| double* wa3 = (double*)pws; |
| pws += n * sizeof(double) / sizeof(char); |
| double* wf = (double*)pws; |
| pws += m * sizeof(double) / sizeof(char); |
| int* Pivot = (int*)pws; |
| pws += n * sizeof(int) / sizeof(char); |
| |
| /* Initialize diag. */ |
| if (!C->scale_diag) |
| for (j = 0; j < n; j++) |
| diag[j] = 1; |
| |
| /*** Evaluate function at starting point and calculate norm. ***/ |
| |
| if (C->verbosity) { |
| fprintf(msgfile, "lmmin start "); |
| lm_print_pars(nout, x, msgfile); |
| } |
| (*evaluate)(x, m, data, fvec, &(S->userbreak)); |
| if (C->verbosity > 4) |
| for (i = 0; i < m; ++i) |
| fprintf(msgfile, " fvec[%4i] = %18.8g\n", i, fvec[i]); |
| S->nfev = 1; |
| if (S->userbreak) |
| goto terminate; |
| fnorm = lm_enorm(m, fvec); |
| if (C->verbosity) |
| fprintf(msgfile, " fnorm = %18.8g\n", fnorm); |
| |
| if (!isfinite(fnorm)) { |
| S->outcome = 12; /* nan */ |
| goto terminate; |
| } else if (fnorm <= LM_DWARF) { |
| S->outcome = 0; /* sum of squares almost zero, nothing to do */ |
| goto terminate; |
| } |
| |
| /*** The outer loop: compute gradient, then descend. ***/ |
| |
| for (int outer = 0;; ++outer) { |
| |
| /** Calculate the Jacobian. **/ |
| for (j = 0; j < n; j++) { |
| temp = x[j]; |
| step = MAX(eps * eps, eps * fabs(temp)); |
| x[j] += step; /* replace temporarily */ |
| (*evaluate)(x, m, data, wf, &(S->userbreak)); |
| ++(S->nfev); |
| if (S->userbreak) |
| goto terminate; |
| for (i = 0; i < m; i++) |
| fjac[j*m+i] = (wf[i] - fvec[i]) / step; |
| x[j] = temp; /* restore */ |
| } |
| if (C->verbosity >= 10) { |
| /* print the entire matrix */ |
| printf("\nlmmin Jacobian\n"); |
| for (i = 0; i < m; i++) { |
| printf(" "); |
| for (j = 0; j < n; j++) |
| printf("%.5e ", fjac[j*m+i]); |
| printf("\n"); |
| } |
| } |
| |
| /** Compute the QR factorization of the Jacobian. **/ |
| |
| /* fjac is an m by n array. The upper n by n submatrix of fjac is made |
| * to contain an upper triangular matrix R with diagonal elements of |
| * nonincreasing magnitude such that |
| * |
| * P^T*(J^T*J)*P = R^T*R |
| * |
| * (NOTE: ^T stands for matrix transposition), |
| * |
| * where P is a permutation matrix and J is the final calculated |
| * Jacobian. Column j of P is column Pivot(j) of the identity matrix. |
| * The lower trapezoidal part of fjac contains information generated |
| * during the computation of R. |
| * |
| * Pivot is an integer array of length n. It defines a permutation |
| * matrix P such that jac*P = Q*R, where jac is the final calculated |
| * Jacobian, Q is orthogonal (not stored), and R is upper triangular |
| * with diagonal elements of nonincreasing magnitude. Column j of P |
| * is column Pivot(j) of the identity matrix. |
| */ |
| |
| lm_qrfac(m, n, fjac, Pivot, wa1, wa2, wa3); |
| /* return values are Pivot, wa1=rdiag, wa2=acnorm */ |
| |
| /** Form Q^T * fvec, and store first n components in qtf. **/ |
| for (i = 0; i < m; i++) |
| wf[i] = fvec[i]; |
| |
| for (j = 0; j < n; j++) { |
| temp3 = fjac[j*m+j]; |
| if (temp3 != 0) { |
| sum = 0; |
| for (i = j; i < m; i++) |
| sum += fjac[j*m+i] * wf[i]; |
| temp = -sum / temp3; |
| for (i = j; i < m; i++) |
| wf[i] += fjac[j*m+i] * temp; |
| } |
| fjac[j*m+j] = wa1[j]; |
| qtf[j] = wf[j]; |
| } |
| |
| /** Compute norm of scaled gradient and detect degeneracy. **/ |
| gnorm = 0; |
| for (j = 0; j < n; j++) { |
| if (wa2[Pivot[j]] == 0) |
| continue; |
| sum = 0; |
| for (i = 0; i <= j; i++) |
| sum += fjac[j*m+i] * qtf[i]; |
| gnorm = MAX(gnorm, fabs(sum / wa2[Pivot[j]] / fnorm)); |
| } |
| |
| if (gnorm <= C->gtol) { |
| S->outcome = 4; |
| goto terminate; |
| } |
| |
| /** Initialize or update diag and delta. **/ |
| if (!outer) { /* first iteration only */ |
| if (C->scale_diag) { |
| /* diag := norms of the columns of the initial Jacobian */ |
| for (j = 0; j < n; j++) |
| diag[j] = wa2[j] ? wa2[j] : 1; |
| /* xnorm := || D x || */ |
| for (j = 0; j < n; j++) |
| wa3[j] = diag[j] * x[j]; |
| xnorm = lm_enorm(n, wa3); |
| if (C->verbosity >= 2) { |
| fprintf(msgfile, "lmmin diag "); |
| lm_print_pars(nout, x, msgfile); // xnorm |
| fprintf(msgfile, " xnorm = %18.8g\n", xnorm); |
| } |
| /* Only now print the header for the loop table. */ |
| if (C->verbosity >= 3) { |
| fprintf(msgfile, " o i lmpar prered" |
| " ratio dirder delta" |
| " pnorm fnorm"); |
| for (i = 0; i < nout; ++i) |
| fprintf(msgfile, " p%i", i); |
| fprintf(msgfile, "\n"); |
| } |
| } else { |
| xnorm = lm_enorm(n, x); |
| } |
| if (!isfinite(xnorm)) { |
| S->outcome = 12; /* nan */ |
| goto terminate; |
| } |
| /* Initialize the step bound delta. */ |
| if (xnorm) |
| delta = C->stepbound * xnorm; |
| else |
| delta = C->stepbound; |
| } else { |
| if (C->scale_diag) { |
| for (j = 0; j < n; j++) |
| diag[j] = MAX(diag[j], wa2[j]); |
| } |
| } |
| |
| /** The inner loop. **/ |
| int inner = 0; |
| do { |
| |
| /** Determine the Levenberg-Marquardt parameter. **/ |
| lm_lmpar(n, fjac, m, Pivot, diag, qtf, delta, &lmpar, |
| wa1, wa2, wf, wa3); |
| /* used return values are fjac (partly), lmpar, wa1=x, wa3=diag*x */ |
| |
| /* Predict scaled reduction. */ |
| pnorm = lm_enorm(n, wa3); |
| if (!isfinite(pnorm)) { |
| S->outcome = 12; /* nan */ |
| goto terminate; |
| } |
| temp2 = lmpar * SQR(pnorm / fnorm); |
| for (j = 0; j < n; j++) { |
| wa3[j] = 0; |
| for (i = 0; i <= j; i++) |
| wa3[i] -= fjac[j*m+i] * wa1[Pivot[j]]; |
| } |
| temp1 = SQR(lm_enorm(n, wa3) / fnorm); |
| if (!isfinite(temp1)) { |
| S->outcome = 12; /* nan */ |
| goto terminate; |
| } |
| prered = temp1 + 2*temp2; |
| dirder = -temp1 + temp2; /* scaled directional derivative */ |
| |
| /* At first call, adjust the initial step bound. */ |
| if (!outer && pnorm < delta) |
| delta = pnorm; |
| |
| /** Evaluate the function at x + p. **/ |
| for (j = 0; j < n; j++) |
| wa2[j] = x[j] - wa1[j]; |
| (*evaluate)(wa2, m, data, wf, &(S->userbreak)); |
| ++(S->nfev); |
| if (S->userbreak) |
| goto terminate; |
| fnorm1 = lm_enorm(m, wf); |
| if (!isfinite(fnorm1)) { |
| S->outcome = 12; /* nan */ |
| goto terminate; |
| } |
| |
| /** Evaluate the scaled reduction. **/ |
| |
| /* Actual scaled reduction. */ |
| actred = 1 - SQR(fnorm1 / fnorm); |
| |
| /* Ratio of actual to predicted reduction. */ |
| ratio = prered ? actred / prered : 0; |
| |
| if (C->verbosity == 2) { |
| fprintf(msgfile, "lmmin (%i:%i) ", outer, inner); |
| lm_print_pars(nout, wa2, msgfile); // fnorm1, |
| } else if (C->verbosity >= 3) { |
| printf("%3i %2i %9.2g %9.2g %14.6g" |
| " %9.2g %10.3e %10.3e %21.15e", |
| outer, inner, lmpar, prered, ratio, |
| dirder, delta, pnorm, fnorm1); |
| for (i = 0; i < nout; ++i) |
| fprintf(msgfile, " %16.9g", wa2[i]); |
| fprintf(msgfile, "\n"); |
| } |
| |
| /* Update the step bound. */ |
| if (ratio <= 0.25) { |
| if (actred >= 0) |
| temp = 0.5; |
| else if (actred > -99) /* -99 = 1-1/0.1^2 */ |
| temp = MAX(dirder / (2*dirder + actred), 0.1); |
| else |
| temp = 0.1; |
| delta = temp * MIN(delta, pnorm / 0.1); |
| lmpar /= temp; |
| } else if (ratio >= 0.75) { |
| delta = 2 * pnorm; |
| lmpar *= 0.5; |
| } else if (!lmpar) { |
| delta = 2 * pnorm; |
| } |
| |
| /** On success, update solution, and test for convergence. **/ |
| |
| inner_success = ratio >= 1e-4; |
| if (inner_success) { |
| |
| /* Update x, fvec, and their norms. */ |
| if (C->scale_diag) { |
| for (j = 0; j < n; j++) { |
| x[j] = wa2[j]; |
| wa2[j] = diag[j] * x[j]; |
| } |
| } else { |
| for (j = 0; j < n; j++) |
| x[j] = wa2[j]; |
| } |
| for (i = 0; i < m; i++) |
| fvec[i] = wf[i]; |
| xnorm = lm_enorm(n, wa2); |
| if (!isfinite(xnorm)) { |
| S->outcome = 12; /* nan */ |
| goto terminate; |
| } |
| fnorm = fnorm1; |
| } |
| |
| /* Convergence tests. */ |
| S->outcome = 0; |
| if (fnorm <= LM_DWARF) |
| goto terminate; /* success: sum of squares almost zero */ |
| /* Test two criteria (both may be fulfilled). */ |
| if (fabs(actred) <= C->ftol && prered <= C->ftol && ratio <= 2) |
| S->outcome = 1; /* success: x almost stable */ |
| if (delta <= C->xtol * xnorm) |
| S->outcome += 2; /* success: sum of squares almost stable */ |
| if (S->outcome != 0) { |
| goto terminate; |
| } |
| |
| /** Tests for termination and stringent tolerances. **/ |
| if (S->nfev >= maxfev) { |
| S->outcome = 5; |
| goto terminate; |
| } |
| if (fabs(actred) <= LM_MACHEP && prered <= LM_MACHEP && |
| ratio <= 2) { |
| S->outcome = 6; |
| goto terminate; |
| } |
| if (delta <= LM_MACHEP * xnorm) { |
| S->outcome = 7; |
| goto terminate; |
| } |
| if (gnorm <= LM_MACHEP) { |
| S->outcome = 8; |
| goto terminate; |
| } |
| |
| /** End of the inner loop. Repeat if iteration unsuccessful. **/ |
| ++inner; |
| } while (!inner_success); |
| |
| }; /*** End of the outer loop. ***/ |
| |
| terminate: |
| S->fnorm = lm_enorm(m, fvec); |
| if (C->verbosity >= 2) |
| printf("lmmin outcome (%i) xnorm %g ftol %g xtol %g\n", S->outcome, |
| xnorm, C->ftol, C->xtol); |
| if (C->verbosity & 1) { |
| fprintf(msgfile, "lmmin final "); |
| lm_print_pars(nout, x, msgfile); // S->fnorm, |
| fprintf(msgfile, " fnorm = %18.8g\n", S->fnorm); |
| } |
| if (S->userbreak) /* user-requested break */ |
| S->outcome = 11; |
| |
| /*** Deallocate the workspace. ***/ |
| free(ws); |
| |
| } /*** lmmin. ***/ |
| |
| /******************************************************************************/ |
| /* lm_lmpar (determine Levenberg-Marquardt parameter) */ |
| /******************************************************************************/ |
| |
| void lm_lmpar(const int n, double* r, const int ldr, const int* Pivot, |
| const double* diag, const double* qtb, const double delta, |
| double* par, double* x, double* Sdiag, double* aux, double* xdi) |
| /* Given an m by n matrix A, an n by n nonsingular diagonal matrix D, |
| * an m-vector b, and a positive number delta, the problem is to |
| * determine a parameter value par such that if x solves the system |
| * |
| * A*x = b and sqrt(par)*D*x = 0 |
| * |
| * in the least squares sense, and dxnorm is the Euclidean norm of D*x, |
| * then either par=0 and (dxnorm-delta) < 0.1*delta, or par>0 and |
| * abs(dxnorm-delta) < 0.1*delta. |
| * |
| * Using lm_qrsolv, this subroutine completes the solution of the |
| * problem if it is provided with the necessary information from the |
| * QR factorization, with column pivoting, of A. That is, if A*P = Q*R, |
| * where P is a permutation matrix, Q has orthogonal columns, and R is |
| * an upper triangular matrix with diagonal elements of nonincreasing |
| * magnitude, then lmpar expects the full upper triangle of R, the |
| * permutation matrix P, and the first n components of Q^T*b. On output |
| * lmpar also provides an upper triangular matrix S such that |
| * |
| * P^T*(A^T*A + par*D*D)*P = S^T*S. |
| * |
| * S is employed within lmpar and may be of separate interest. |
| * |
| * Only a few iterations are generally needed for convergence of the |
| * algorithm. If, however, the limit of 10 iterations is reached, then |
| * the output par will contain the best value obtained so far. |
| * |
| * Parameters: |
| * |
| * n is a positive integer INPUT variable set to the order of r. |
| * |
| * r is an n by n array. On INPUT the full upper triangle must contain |
| * the full upper triangle of the matrix R. On OUTPUT the full upper |
| * triangle is unaltered, and the strict lower triangle contains the |
| * strict upper triangle (transposed) of the upper triangular matrix S. |
| * |
| * ldr is a positive integer INPUT variable not less than n which |
| * specifies the leading dimension of the array R. |
| * |
| * Pivot is an integer INPUT array of length n which defines the |
| * permutation matrix P such that A*P = Q*R. Column j of P is column |
| * Pivot(j) of the identity matrix. |
| * |
| * diag is an INPUT array of length n which must contain the diagonal |
| * elements of the matrix D. |
| * |
| * qtb is an INPUT array of length n which must contain the first |
| * n elements of the vector Q^T*b. |
| * |
| * delta is a positive INPUT variable which specifies an upper bound |
| * on the Euclidean norm of D*x. |
| * |
| * par is a nonnegative variable. On INPUT par contains an initial |
| * estimate of the Levenberg-Marquardt parameter. On OUTPUT par |
| * contains the final estimate. |
| * |
| * x is an OUTPUT array of length n which contains the least-squares |
| * solution of the system A*x = b, sqrt(par)*D*x = 0, for the output par. |
| * |
| * Sdiag is an array of length n needed as workspace; on OUTPUT it |
| * contains the diagonal elements of the upper triangular matrix S. |
| * |
| * aux is a multi-purpose work array of length n. |
| * |
| * xdi is a work array of length n. On OUTPUT: diag[j] * x[j]. |
| * |
| */ |
| { |
| int i, iter, j, nsing; |
| double dxnorm, fp, fp_old, gnorm, parc, parl, paru; |
| double sum, temp; |
| static double p1 = 0.1; |
| |
| /*** Compute and store in x the Gauss-Newton direction. If the Jacobian |
| is rank-deficient, obtain a least-squares solution. ***/ |
| |
| nsing = n; |
| for (j = 0; j < n; j++) { |
| aux[j] = qtb[j]; |
| if (r[j*ldr+j] == 0 && nsing == n) |
| nsing = j; |
| if (nsing < n) |
| aux[j] = 0; |
| } |
| for (j = nsing-1; j >= 0; j--) { |
| aux[j] = aux[j] / r[j+ldr*j]; |
| temp = aux[j]; |
| for (i = 0; i < j; i++) |
| aux[i] -= r[j*ldr+i] * temp; |
| } |
| |
| for (j = 0; j < n; j++) |
| x[Pivot[j]] = aux[j]; |
| |
| /*** Initialize the iteration counter, evaluate the function at the origin, |
| and test for acceptance of the Gauss-Newton direction. ***/ |
| |
| for (j = 0; j < n; j++) |
| xdi[j] = diag[j] * x[j]; |
| dxnorm = lm_enorm(n, xdi); |
| fp = dxnorm - delta; |
| if (fp <= p1 * delta) { |
| #ifdef LMFIT_DEBUG_MESSAGES |
| printf("debug lmpar nsing=%d, n=%d, terminate[fp<=p1*del]\n", nsing, n); |
| #endif |
| *par = 0; |
| return; |
| } |
| |
| /*** If the Jacobian is not rank deficient, the Newton step provides a |
| lower bound, parl, for the zero of the function. Otherwise set this |
| bound to zero. ***/ |
| |
| parl = 0; |
| if (nsing >= n) { |
| for (j = 0; j < n; j++) |
| aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm; |
| |
| for (j = 0; j < n; j++) { |
| sum = 0; |
| for (i = 0; i < j; i++) |
| sum += r[j*ldr+i] * aux[i]; |
| aux[j] = (aux[j] - sum) / r[j+ldr*j]; |
| } |
| temp = lm_enorm(n, aux); |
| parl = fp / delta / temp / temp; |
| } |
| |
| /*** Calculate an upper bound, paru, for the zero of the function. ***/ |
| |
| for (j = 0; j < n; j++) { |
| sum = 0; |
| for (i = 0; i <= j; i++) |
| sum += r[j*ldr+i] * qtb[i]; |
| aux[j] = sum / diag[Pivot[j]]; |
| } |
| gnorm = lm_enorm(n, aux); |
| paru = gnorm / delta; |
| if (paru == 0) |
| paru = LM_DWARF / MIN(delta, p1); |
| |
| /*** If the input par lies outside of the interval (parl,paru), |
| set par to the closer endpoint. ***/ |
| |
| *par = MAX(*par, parl); |
| *par = MIN(*par, paru); |
| if (*par == 0) |
| *par = gnorm / dxnorm; |
| |
| /*** Iterate. ***/ |
| |
| for (iter = 0;; iter++) { |
| |
| /** Evaluate the function at the current value of par. **/ |
| if (*par == 0) |
| *par = MAX(LM_DWARF, 0.001 * paru); |
| temp = sqrt(*par); |
| for (j = 0; j < n; j++) |
| aux[j] = temp * diag[j]; |
| |
| lm_qrsolv(n, r, ldr, Pivot, aux, qtb, x, Sdiag, xdi); |
| /* return values are r, x, Sdiag */ |
| |
| for (j = 0; j < n; j++) |
| xdi[j] = diag[j] * x[j]; /* used as output */ |
| dxnorm = lm_enorm(n, xdi); |
| fp_old = fp; |
| fp = dxnorm - delta; |
| |
| /** If the function is small enough, accept the current value |
| of par. Also test for the exceptional cases where parl |
| is zero or the number of iterations has reached 10. **/ |
| if (fabs(fp) <= p1 * delta || |
| (parl == 0 && fp <= fp_old && fp_old < 0) || iter == 10) { |
| #ifdef LMFIT_DEBUG_MESSAGES |
| printf("debug lmpar nsing=%d, iter=%d, " |
| "par=%.4e [%.4e %.4e], delta=%.4e, fp=%.4e\n", |
| nsing, iter, *par, parl, paru, delta, fp); |
| #endif |
| break; /* the only exit from the iteration. */ |
| } |
| |
| /** Compute the Newton correction. **/ |
| for (j = 0; j < n; j++) |
| aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm; |
| |
| for (j = 0; j < n; j++) { |
| aux[j] = aux[j] / Sdiag[j]; |
| for (i = j+1; i < n; i++) |
| aux[i] -= r[j*ldr+i] * aux[j]; |
| } |
| temp = lm_enorm(n, aux); |
| parc = fp / delta / temp / temp; |
| |
| /** Depending on the sign of the function, update parl or paru. **/ |
| if (fp > 0) |
| parl = MAX(parl, *par); |
| else /* fp < 0 [the case fp==0 is precluded by the break condition] */ |
| paru = MIN(paru, *par); |
| |
| /** Compute an improved estimate for par. **/ |
| *par = MAX(parl, *par + parc); |
| } |
| |
| } /*** lm_lmpar. ***/ |
| |
| /******************************************************************************/ |
| /* lm_qrfac (QR factorization, from lapack) */ |
| /******************************************************************************/ |
| |
| void lm_qrfac(const int m, const int n, double* A, int* Pivot, double* Rdiag, |
| double* Acnorm, double* W) |
| /* |
| * This subroutine uses Householder transformations with column pivoting |
| * to compute a QR factorization of the m by n matrix A. That is, qrfac |
| * determines an orthogonal matrix Q, a permutation matrix P, and an |
| * upper trapezoidal matrix R with diagonal elements of nonincreasing |
| * magnitude, such that A*P = Q*R. The Householder transformation for |
| * column k, k = 1,2,...,n, is of the form |
| * |
| * I - 2*w*wT/|w|^2 |
| * |
| * where w has zeroes in the first k-1 positions. |
| * |
| * Parameters: |
| * |
| * m is an INPUT parameter set to the number of rows of A. |
| * |
| * n is an INPUT parameter set to the number of columns of A. |
| * |
| * A is an m by n array. On INPUT, A contains the matrix for which the |
| * QR factorization is to be computed. On OUTPUT the strict upper |
| * trapezoidal part of A contains the strict upper trapezoidal part |
| * of R, and the lower trapezoidal part of A contains a factored form |
| * of Q (the non-trivial elements of the vectors w described above). |
| * |
| * Pivot is an integer OUTPUT array of length n that describes the |
| * permutation matrix P. Column j of P is column Pivot(j) of the |
| * identity matrix. |
| * |
| * Rdiag is an OUTPUT array of length n which contains the diagonal |
| * elements of R. |
| * |
| * Acnorm is an OUTPUT array of length n which contains the norms of |
| * the corresponding columns of the input matrix A. If this information |
| * is not needed, then Acnorm can share storage with Rdiag. |
| * |
| * W is a work array of length n. |
| * |
| */ |
| { |
| int i, j, k, kmax; |
| double ajnorm, sum, temp; |
| |
| #ifdef LMFIT_DEBUG_MESSAGES |
| printf("debug qrfac\n"); |
| #endif |
| |
| /** Compute initial column norms; |
| initialize Pivot with identity permutation. ***/ |
| for (j = 0; j < n; j++) { |
| W[j] = Rdiag[j] = Acnorm[j] = lm_enorm(m, &A[j*m]); |
| Pivot[j] = j; |
| } |
| |
| /** Loop over columns of A. **/ |
| assert(n <= m); |
| for (j = 0; j < n; j++) { |
| |
| /** Bring the column of largest norm into the pivot position. **/ |
| kmax = j; |
| for (k = j+1; k < n; k++) |
| if (Rdiag[k] > Rdiag[kmax]) |
| kmax = k; |
| |
| if (kmax != j) { |
| /* Swap columns j and kmax. */ |
| k = Pivot[j]; |
| Pivot[j] = Pivot[kmax]; |
| Pivot[kmax] = k; |
| for (i = 0; i < m; i++) { |
| temp = A[j*m+i]; |
| A[j*m+i] = A[kmax*m+i]; |
| A[kmax*m+i] = temp; |
| } |
| /* Half-swap: Rdiag[j], W[j] won't be needed any further. */ |
| Rdiag[kmax] = Rdiag[j]; |
| W[kmax] = W[j]; |
| } |
| |
| /** Compute the Householder reflection vector w_j to reduce the |
| j-th column of A to a multiple of the j-th unit vector. **/ |
| ajnorm = lm_enorm(m-j, &A[j*m+j]); |
| if (ajnorm == 0) { |
| Rdiag[j] = 0; |
| continue; |
| } |
| |
| /* Let the partial column vector A[j][j:] contain w_j := e_j+-a_j/|a_j|, |
| where the sign +- is chosen to avoid cancellation in w_jj. */ |
| if (A[j*m+j] < 0) |
| ajnorm = -ajnorm; |
| for (i = j; i < m; i++) |
| A[j*m+i] /= ajnorm; |
| A[j*m+j] += 1; |
| |
| /** Apply the Householder transformation U_w := 1 - 2*w_j.w_j/|w_j|^2 |
| to the remaining columns, and update the norms. **/ |
| for (k = j+1; k < n; k++) { |
| /* Compute scalar product w_j * a_j. */ |
| sum = 0; |
| for (i = j; i < m; i++) |
| sum += A[j*m+i] * A[k*m+i]; |
| |
| /* Normalization is simplified by the coincidence |w_j|^2=2w_jj. */ |
| temp = sum / A[j*m+j]; |
| |
| /* Carry out transform U_w_j * a_k. */ |
| for (i = j; i < m; i++) |
| A[k*m+i] -= temp * A[j*m+i]; |
| |
| /* No idea what happens here. */ |
| if (Rdiag[k] != 0) { |
| temp = A[m*k+j] / Rdiag[k]; |
| if (fabs(temp) < 1) { |
| Rdiag[k] *= sqrt(1 - SQR(temp)); |
| temp = Rdiag[k] / W[k]; |
| } else |
| temp = 0; |
| if (temp == 0 || 0.05 * SQR(temp) <= LM_MACHEP) { |
| Rdiag[k] = lm_enorm(m-j-1, &A[m*k+j+1]); |
| W[k] = Rdiag[k]; |
| } |
| } |
| } |
| |
| Rdiag[j] = -ajnorm; |
| } |
| } /*** lm_qrfac. ***/ |
| |
| /******************************************************************************/ |
| /* lm_qrsolv (linear least-squares) */ |
| /******************************************************************************/ |
| |
| void lm_qrsolv(const int n, double* r, const int ldr, const int* Pivot, |
| const double* diag, const double* qtb, double* x, |
| double* Sdiag, double* W) |
| /* |
| * Given an m by n matrix A, an n by n diagonal matrix D, and an |
| * m-vector b, the problem is to determine an x which solves the |
| * system |
| * |
| * A*x = b and D*x = 0 |
| * |
| * in the least squares sense. |
| * |
| * This subroutine completes the solution of the problem if it is |
| * provided with the necessary information from the QR factorization, |
| * with column pivoting, of A. That is, if A*P = Q*R, where P is a |
| * permutation matrix, Q has orthogonal columns, and R is an upper |
| * triangular matrix with diagonal elements of nonincreasing magnitude, |
| * then qrsolv expects the full upper triangle of R, the permutation |
| * matrix P, and the first n components of Q^T*b. The system |
| * A*x = b, D*x = 0, is then equivalent to |
| * |
| * R*z = Q^T*b, P^T*D*P*z = 0, |
| * |
| * where x = P*z. If this system does not have full rank, then a least |
| * squares solution is obtained. On output qrsolv also provides an upper |
| * triangular matrix S such that |
| * |
| * P^T*(A^T*A + D*D)*P = S^T*S. |
| * |
| * S is computed within qrsolv and may be of separate interest. |
| * |
| * Parameters: |
| * |
| * n is a positive integer INPUT variable set to the order of R. |
| * |
| * r is an n by n array. On INPUT the full upper triangle must contain |
| * the full upper triangle of the matrix R. On OUTPUT the full upper |
| * triangle is unaltered, and the strict lower triangle contains the |
| * strict upper triangle (transposed) of the upper triangular matrix S. |
| * |
| * ldr is a positive integer INPUT variable not less than n which |
| * specifies the leading dimension of the array R. |
| * |
| * Pivot is an integer INPUT array of length n which defines the |
| * permutation matrix P such that A*P = Q*R. Column j of P is column |
| * Pivot(j) of the identity matrix. |
| * |
| * diag is an INPUT array of length n which must contain the diagonal |
| * elements of the matrix D. |
| * |
| * qtb is an INPUT array of length n which must contain the first |
| * n elements of the vector Q^T*b. |
| * |
| * x is an OUTPUT array of length n which contains the least-squares |
| * solution of the system A*x = b, D*x = 0. |
| * |
| * Sdiag is an OUTPUT array of length n which contains the diagonal |
| * elements of the upper triangular matrix S. |
| * |
| * W is a work array of length n. |
| * |
| */ |
| { |
| int i, kk, j, k, nsing; |
| double qtbpj, sum, temp; |
| double _sin, _cos, _tan, _cot; /* local variables, not functions */ |
| |
| /*** Copy R and Q^T*b to preserve input and initialize S. |
| In particular, save the diagonal elements of R in x. ***/ |
| |
| for (j = 0; j < n; j++) { |
| for (i = j; i < n; i++) |
| r[j*ldr+i] = r[i*ldr+j]; |
| x[j] = r[j*ldr+j]; |
| W[j] = qtb[j]; |
| } |
| |
| /*** Eliminate the diagonal matrix D using a Givens rotation. ***/ |
| |
| for (j = 0; j < n; j++) { |
| |
| /*** Prepare the row of D to be eliminated, locating the diagonal |
| element using P from the QR factorization. ***/ |
| |
| if (diag[Pivot[j]] != 0) { |
| for (k = j; k < n; k++) |
| Sdiag[k] = 0; |
| Sdiag[j] = diag[Pivot[j]]; |
| |
| /*** The transformations to eliminate the row of D modify only |
| a single element of Q^T*b beyond the first n, which is |
| initially 0. ***/ |
| |
| qtbpj = 0; |
| for (k = j; k < n; k++) { |
| |
| /** Determine a Givens rotation which eliminates the |
| appropriate element in the current row of D. **/ |
| if (Sdiag[k] == 0) |
| continue; |
| kk = k + ldr * k; |
| if (fabs(r[kk]) < fabs(Sdiag[k])) { |
| _cot = r[kk] / Sdiag[k]; |
| _sin = 1 / hypot(1, _cot); |
| _cos = _sin * _cot; |
| } else { |
| _tan = Sdiag[k] / r[kk]; |
| _cos = 1 / hypot(1, _tan); |
| _sin = _cos * _tan; |
| } |
| |
| /** Compute the modified diagonal element of R and |
| the modified element of (Q^T*b,0). **/ |
| r[kk] = _cos * r[kk] + _sin * Sdiag[k]; |
| temp = _cos * W[k] + _sin * qtbpj; |
| qtbpj = -_sin * W[k] + _cos * qtbpj; |
| W[k] = temp; |
| |
| /** Accumulate the tranformation in the row of S. **/ |
| for (i = k+1; i < n; i++) { |
| temp = _cos * r[k*ldr+i] + _sin * Sdiag[i]; |
| Sdiag[i] = -_sin * r[k*ldr+i] + _cos * Sdiag[i]; |
| r[k*ldr+i] = temp; |
| } |
| } |
| } |
| |
| /** Store the diagonal element of S and restore |
| the corresponding diagonal element of R. **/ |
| Sdiag[j] = r[j*ldr+j]; |
| r[j*ldr+j] = x[j]; |
| } |
| |
| /*** Solve the triangular system for z. If the system is singular, then |
| obtain a least-squares solution. ***/ |
| |
| nsing = n; |
| for (j = 0; j < n; j++) { |
| if (Sdiag[j] == 0 && nsing == n) |
| nsing = j; |
| if (nsing < n) |
| W[j] = 0; |
| } |
| |
| for (j = nsing-1; j >= 0; j--) { |
| sum = 0; |
| for (i = j+1; i < nsing; i++) |
| sum += r[j*ldr+i] * W[i]; |
| W[j] = (W[j] - sum) / Sdiag[j]; |
| } |
| |
| /*** Permute the components of z back to components of x. ***/ |
| |
| for (j = 0; j < n; j++) |
| x[Pivot[j]] = W[j]; |
| |
| } /*** lm_qrsolv. ***/ |
| |
| /******************************************************************************/ |
| /* lm_enorm (Euclidean norm) */ |
| /******************************************************************************/ |
| |
| double lm_enorm(int n, const double* x) |
| /* This function calculates the Euclidean norm of an n-vector x. |
| * |
| * The Euclidean norm is computed by accumulating the sum of squares |
| * in three different sums. The sums of squares for the small and large |
| * components are scaled so that no overflows occur. Non-destructive |
| * underflows are permitted. Underflows and overflows do not occur in |
| * the computation of the unscaled sum of squares for the intermediate |
| * components. The definitions of small, intermediate and large components |
| * depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main |
| * restrictions on these constants are that LM_SQRT_DWARF**2 not underflow |
| * and LM_SQRT_GIANT**2 not overflow. |
| * |
| * Parameters: |
| * |
| * n is a positive integer INPUT variable. |
| * |
| * x is an INPUT array of length n. |
| */ |
| { |
| int i; |
| double agiant, s1, s2, s3, xabs, x1max, x3max; |
| |
| s1 = 0; |
| s2 = 0; |
| s3 = 0; |
| x1max = 0; |
| x3max = 0; |
| agiant = LM_SQRT_GIANT / n; |
| |
| /** Sum squares. **/ |
| for (i = 0; i < n; i++) { |
| xabs = fabs(x[i]); |
| if (xabs > LM_SQRT_DWARF) { |
| if (xabs < agiant) { |
| s2 += SQR(xabs); |
| } else if (xabs > x1max) { |
| s1 = 1 + s1 * SQR(x1max / xabs); |
| x1max = xabs; |
| } else { |
| s1 += SQR(xabs / x1max); |
| } |
| } else if (xabs > x3max) { |
| s3 = 1 + s3 * SQR(x3max / xabs); |
| x3max = xabs; |
| } else if (xabs != 0) { |
| s3 += SQR(xabs / x3max); |
| } |
| } |
| |
| /** Calculate the norm. **/ |
| if (s1 != 0) |
| return x1max * sqrt(s1 + (s2 / x1max) / x1max); |
| else if (s2 != 0) |
| if (s2 >= x3max) |
| return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3))); |
| else |
| return sqrt(x3max * ((s2 / x3max) + (x3max * s3))); |
| else |
| return x3max * sqrt(s3); |
| |
| } /*** lm_enorm. ***/ |