001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math3.optimization.general; 018 019 import java.util.Arrays; 020 021 import org.apache.commons.math3.exception.ConvergenceException; 022 import org.apache.commons.math3.exception.util.LocalizedFormats; 023 import org.apache.commons.math3.optimization.PointVectorValuePair; 024 import org.apache.commons.math3.optimization.ConvergenceChecker; 025 import org.apache.commons.math3.linear.RealMatrix; 026 import org.apache.commons.math3.util.Precision; 027 import org.apache.commons.math3.util.FastMath; 028 029 030 /** 031 * This class solves a least squares problem using the Levenberg-Marquardt algorithm. 032 * 033 * <p>This implementation <em>should</em> work even for over-determined systems 034 * (i.e. systems having more point than equations). Over-determined systems 035 * are solved by ignoring the point which have the smallest impact according 036 * to their jacobian column norm. Only the rank of the matrix and some loop bounds 037 * are changed to implement this.</p> 038 * 039 * <p>The resolution engine is a simple translation of the MINPACK <a 040 * href="http://www.netlib.org/minpack/lmder.f">lmder</a> routine with minor 041 * changes. The changes include the over-determined resolution, the use of 042 * inherited convergence checker and the Q.R. decomposition which has been 043 * rewritten following the algorithm described in the 044 * P. Lascaux and R. Theodor book <i>Analyse numérique matricielle 045 * appliquée à l'art de l'ingénieur</i>, Masson 1986.</p> 046 * <p>The authors of the original fortran version are: 047 * <ul> 048 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 049 * <li>Burton S. Garbow</li> 050 * <li>Kenneth E. Hillstrom</li> 051 * <li>Jorge J. More</li> 052 * </ul> 053 * The redistribution policy for MINPACK is available <a 054 * href="http://www.netlib.org/minpack/disclaimer">here</a>, for convenience, it 055 * is reproduced below.</p> 056 * 057 * <table border="0" width="80%" cellpadding="10" align="center" bgcolor="#E0E0E0"> 058 * <tr><td> 059 * Minpack Copyright Notice (1999) University of Chicago. 060 * All rights reserved 061 * </td></tr> 062 * <tr><td> 063 * Redistribution and use in source and binary forms, with or without 064 * modification, are permitted provided that the following conditions 065 * are met: 066 * <ol> 067 * <li>Redistributions of source code must retain the above copyright 068 * notice, this list of conditions and the following disclaimer.</li> 069 * <li>Redistributions in binary form must reproduce the above 070 * copyright notice, this list of conditions and the following 071 * disclaimer in the documentation and/or other materials provided 072 * with the distribution.</li> 073 * <li>The end-user documentation included with the redistribution, if any, 074 * must include the following acknowledgment: 075 * <code>This product includes software developed by the University of 076 * Chicago, as Operator of Argonne National Laboratory.</code> 077 * Alternately, this acknowledgment may appear in the software itself, 078 * if and wherever such third-party acknowledgments normally appear.</li> 079 * <li><strong>WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" 080 * WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE 081 * UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND 082 * THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR 083 * IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES 084 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE 085 * OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY 086 * OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR 087 * USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF 088 * THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) 089 * DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION 090 * UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL 091 * BE CORRECTED.</strong></li> 092 * <li><strong>LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT 093 * HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF 094 * ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, 095 * INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF 096 * ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF 097 * PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER 098 * SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT 099 * (INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, 100 * EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE 101 * POSSIBILITY OF SUCH LOSS OR DAMAGES.</strong></li> 102 * <ol></td></tr> 103 * </table> 104 * @version $Id: LevenbergMarquardtOptimizer.java 1423555 2012-12-18 18:02:52Z erans $ 105 * @deprecated As of 3.1 (to be removed in 4.0). 106 * @since 2.0 107 * 108 */ 109 @Deprecated 110 public class LevenbergMarquardtOptimizer extends AbstractLeastSquaresOptimizer { 111 /** Number of solved point. */ 112 private int solvedCols; 113 /** Diagonal elements of the R matrix in the Q.R. decomposition. */ 114 private double[] diagR; 115 /** Norms of the columns of the jacobian matrix. */ 116 private double[] jacNorm; 117 /** Coefficients of the Householder transforms vectors. */ 118 private double[] beta; 119 /** Columns permutation array. */ 120 private int[] permutation; 121 /** Rank of the jacobian matrix. */ 122 private int rank; 123 /** Levenberg-Marquardt parameter. */ 124 private double lmPar; 125 /** Parameters evolution direction associated with lmPar. */ 126 private double[] lmDir; 127 /** Positive input variable used in determining the initial step bound. */ 128 private final double initialStepBoundFactor; 129 /** Desired relative error in the sum of squares. */ 130 private final double costRelativeTolerance; 131 /** Desired relative error in the approximate solution parameters. */ 132 private final double parRelativeTolerance; 133 /** Desired max cosine on the orthogonality between the function vector 134 * and the columns of the jacobian. */ 135 private final double orthoTolerance; 136 /** Threshold for QR ranking. */ 137 private final double qrRankingThreshold; 138 /** Weighted residuals. */ 139 private double[] weightedResidual; 140 /** Weighted Jacobian. */ 141 private double[][] weightedJacobian; 142 143 /** 144 * Build an optimizer for least squares problems with default values 145 * for all the tuning parameters (see the {@link 146 * #LevenbergMarquardtOptimizer(double,double,double,double,double) 147 * other contructor}. 148 * The default values for the algorithm settings are: 149 * <ul> 150 * <li>Initial step bound factor: 100</li> 151 * <li>Cost relative tolerance: 1e-10</li> 152 * <li>Parameters relative tolerance: 1e-10</li> 153 * <li>Orthogonality tolerance: 1e-10</li> 154 * <li>QR ranking threshold: {@link Precision#SAFE_MIN}</li> 155 * </ul> 156 */ 157 public LevenbergMarquardtOptimizer() { 158 this(100, 1e-10, 1e-10, 1e-10, Precision.SAFE_MIN); 159 } 160 161 /** 162 * Constructor that allows the specification of a custom convergence 163 * checker. 164 * Note that all the usual convergence checks will be <em>disabled</em>. 165 * The default values for the algorithm settings are: 166 * <ul> 167 * <li>Initial step bound factor: 100</li> 168 * <li>Cost relative tolerance: 1e-10</li> 169 * <li>Parameters relative tolerance: 1e-10</li> 170 * <li>Orthogonality tolerance: 1e-10</li> 171 * <li>QR ranking threshold: {@link Precision#SAFE_MIN}</li> 172 * </ul> 173 * 174 * @param checker Convergence checker. 175 */ 176 public LevenbergMarquardtOptimizer(ConvergenceChecker<PointVectorValuePair> checker) { 177 this(100, checker, 1e-10, 1e-10, 1e-10, Precision.SAFE_MIN); 178 } 179 180 /** 181 * Constructor that allows the specification of a custom convergence 182 * checker, in addition to the standard ones. 183 * 184 * @param initialStepBoundFactor Positive input variable used in 185 * determining the initial step bound. This bound is set to the 186 * product of initialStepBoundFactor and the euclidean norm of 187 * {@code diag * x} if non-zero, or else to {@code initialStepBoundFactor} 188 * itself. In most cases factor should lie in the interval 189 * {@code (0.1, 100.0)}. {@code 100} is a generally recommended value. 190 * @param checker Convergence checker. 191 * @param costRelativeTolerance Desired relative error in the sum of 192 * squares. 193 * @param parRelativeTolerance Desired relative error in the approximate 194 * solution parameters. 195 * @param orthoTolerance Desired max cosine on the orthogonality between 196 * the function vector and the columns of the Jacobian. 197 * @param threshold Desired threshold for QR ranking. If the squared norm 198 * of a column vector is smaller or equal to this threshold during QR 199 * decomposition, it is considered to be a zero vector and hence the rank 200 * of the matrix is reduced. 201 */ 202 public LevenbergMarquardtOptimizer(double initialStepBoundFactor, 203 ConvergenceChecker<PointVectorValuePair> checker, 204 double costRelativeTolerance, 205 double parRelativeTolerance, 206 double orthoTolerance, 207 double threshold) { 208 super(checker); 209 this.initialStepBoundFactor = initialStepBoundFactor; 210 this.costRelativeTolerance = costRelativeTolerance; 211 this.parRelativeTolerance = parRelativeTolerance; 212 this.orthoTolerance = orthoTolerance; 213 this.qrRankingThreshold = threshold; 214 } 215 216 /** 217 * Build an optimizer for least squares problems with default values 218 * for some of the tuning parameters (see the {@link 219 * #LevenbergMarquardtOptimizer(double,double,double,double,double) 220 * other contructor}. 221 * The default values for the algorithm settings are: 222 * <ul> 223 * <li>Initial step bound factor}: 100</li> 224 * <li>QR ranking threshold}: {@link Precision#SAFE_MIN}</li> 225 * </ul> 226 * 227 * @param costRelativeTolerance Desired relative error in the sum of 228 * squares. 229 * @param parRelativeTolerance Desired relative error in the approximate 230 * solution parameters. 231 * @param orthoTolerance Desired max cosine on the orthogonality between 232 * the function vector and the columns of the Jacobian. 233 */ 234 public LevenbergMarquardtOptimizer(double costRelativeTolerance, 235 double parRelativeTolerance, 236 double orthoTolerance) { 237 this(100, 238 costRelativeTolerance, parRelativeTolerance, orthoTolerance, 239 Precision.SAFE_MIN); 240 } 241 242 /** 243 * The arguments control the behaviour of the default convergence checking 244 * procedure. 245 * Additional criteria can defined through the setting of a {@link 246 * ConvergenceChecker}. 247 * 248 * @param initialStepBoundFactor Positive input variable used in 249 * determining the initial step bound. This bound is set to the 250 * product of initialStepBoundFactor and the euclidean norm of 251 * {@code diag * x} if non-zero, or else to {@code initialStepBoundFactor} 252 * itself. In most cases factor should lie in the interval 253 * {@code (0.1, 100.0)}. {@code 100} is a generally recommended value. 254 * @param costRelativeTolerance Desired relative error in the sum of 255 * squares. 256 * @param parRelativeTolerance Desired relative error in the approximate 257 * solution parameters. 258 * @param orthoTolerance Desired max cosine on the orthogonality between 259 * the function vector and the columns of the Jacobian. 260 * @param threshold Desired threshold for QR ranking. If the squared norm 261 * of a column vector is smaller or equal to this threshold during QR 262 * decomposition, it is considered to be a zero vector and hence the rank 263 * of the matrix is reduced. 264 */ 265 public LevenbergMarquardtOptimizer(double initialStepBoundFactor, 266 double costRelativeTolerance, 267 double parRelativeTolerance, 268 double orthoTolerance, 269 double threshold) { 270 super(null); // No custom convergence criterion. 271 this.initialStepBoundFactor = initialStepBoundFactor; 272 this.costRelativeTolerance = costRelativeTolerance; 273 this.parRelativeTolerance = parRelativeTolerance; 274 this.orthoTolerance = orthoTolerance; 275 this.qrRankingThreshold = threshold; 276 } 277 278 /** {@inheritDoc} */ 279 @Override 280 protected PointVectorValuePair doOptimize() { 281 final int nR = getTarget().length; // Number of observed data. 282 final double[] currentPoint = getStartPoint(); 283 final int nC = currentPoint.length; // Number of parameters. 284 285 // arrays shared with the other private methods 286 solvedCols = FastMath.min(nR, nC); 287 diagR = new double[nC]; 288 jacNorm = new double[nC]; 289 beta = new double[nC]; 290 permutation = new int[nC]; 291 lmDir = new double[nC]; 292 293 // local point 294 double delta = 0; 295 double xNorm = 0; 296 double[] diag = new double[nC]; 297 double[] oldX = new double[nC]; 298 double[] oldRes = new double[nR]; 299 double[] oldObj = new double[nR]; 300 double[] qtf = new double[nR]; 301 double[] work1 = new double[nC]; 302 double[] work2 = new double[nC]; 303 double[] work3 = new double[nC]; 304 305 final RealMatrix weightMatrixSqrt = getWeightSquareRoot(); 306 307 // Evaluate the function at the starting point and calculate its norm. 308 double[] currentObjective = computeObjectiveValue(currentPoint); 309 double[] currentResiduals = computeResiduals(currentObjective); 310 PointVectorValuePair current = new PointVectorValuePair(currentPoint, currentObjective); 311 double currentCost = computeCost(currentResiduals); 312 313 // Outer loop. 314 lmPar = 0; 315 boolean firstIteration = true; 316 int iter = 0; 317 final ConvergenceChecker<PointVectorValuePair> checker = getConvergenceChecker(); 318 while (true) { 319 ++iter; 320 final PointVectorValuePair previous = current; 321 322 // QR decomposition of the jacobian matrix 323 qrDecomposition(computeWeightedJacobian(currentPoint)); 324 325 weightedResidual = weightMatrixSqrt.operate(currentResiduals); 326 for (int i = 0; i < nR; i++) { 327 qtf[i] = weightedResidual[i]; 328 } 329 330 // compute Qt.res 331 qTy(qtf); 332 333 // now we don't need Q anymore, 334 // so let jacobian contain the R matrix with its diagonal elements 335 for (int k = 0; k < solvedCols; ++k) { 336 int pk = permutation[k]; 337 weightedJacobian[k][pk] = diagR[pk]; 338 } 339 340 if (firstIteration) { 341 // scale the point according to the norms of the columns 342 // of the initial jacobian 343 xNorm = 0; 344 for (int k = 0; k < nC; ++k) { 345 double dk = jacNorm[k]; 346 if (dk == 0) { 347 dk = 1.0; 348 } 349 double xk = dk * currentPoint[k]; 350 xNorm += xk * xk; 351 diag[k] = dk; 352 } 353 xNorm = FastMath.sqrt(xNorm); 354 355 // initialize the step bound delta 356 delta = (xNorm == 0) ? initialStepBoundFactor : (initialStepBoundFactor * xNorm); 357 } 358 359 // check orthogonality between function vector and jacobian columns 360 double maxCosine = 0; 361 if (currentCost != 0) { 362 for (int j = 0; j < solvedCols; ++j) { 363 int pj = permutation[j]; 364 double s = jacNorm[pj]; 365 if (s != 0) { 366 double sum = 0; 367 for (int i = 0; i <= j; ++i) { 368 sum += weightedJacobian[i][pj] * qtf[i]; 369 } 370 maxCosine = FastMath.max(maxCosine, FastMath.abs(sum) / (s * currentCost)); 371 } 372 } 373 } 374 if (maxCosine <= orthoTolerance) { 375 // Convergence has been reached. 376 setCost(currentCost); 377 // Update (deprecated) "point" field. 378 point = current.getPoint(); 379 return current; 380 } 381 382 // rescale if necessary 383 for (int j = 0; j < nC; ++j) { 384 diag[j] = FastMath.max(diag[j], jacNorm[j]); 385 } 386 387 // Inner loop. 388 for (double ratio = 0; ratio < 1.0e-4;) { 389 390 // save the state 391 for (int j = 0; j < solvedCols; ++j) { 392 int pj = permutation[j]; 393 oldX[pj] = currentPoint[pj]; 394 } 395 final double previousCost = currentCost; 396 double[] tmpVec = weightedResidual; 397 weightedResidual = oldRes; 398 oldRes = tmpVec; 399 tmpVec = currentObjective; 400 currentObjective = oldObj; 401 oldObj = tmpVec; 402 403 // determine the Levenberg-Marquardt parameter 404 determineLMParameter(qtf, delta, diag, work1, work2, work3); 405 406 // compute the new point and the norm of the evolution direction 407 double lmNorm = 0; 408 for (int j = 0; j < solvedCols; ++j) { 409 int pj = permutation[j]; 410 lmDir[pj] = -lmDir[pj]; 411 currentPoint[pj] = oldX[pj] + lmDir[pj]; 412 double s = diag[pj] * lmDir[pj]; 413 lmNorm += s * s; 414 } 415 lmNorm = FastMath.sqrt(lmNorm); 416 // on the first iteration, adjust the initial step bound. 417 if (firstIteration) { 418 delta = FastMath.min(delta, lmNorm); 419 } 420 421 // Evaluate the function at x + p and calculate its norm. 422 currentObjective = computeObjectiveValue(currentPoint); 423 currentResiduals = computeResiduals(currentObjective); 424 current = new PointVectorValuePair(currentPoint, currentObjective); 425 currentCost = computeCost(currentResiduals); 426 427 // compute the scaled actual reduction 428 double actRed = -1.0; 429 if (0.1 * currentCost < previousCost) { 430 double r = currentCost / previousCost; 431 actRed = 1.0 - r * r; 432 } 433 434 // compute the scaled predicted reduction 435 // and the scaled directional derivative 436 for (int j = 0; j < solvedCols; ++j) { 437 int pj = permutation[j]; 438 double dirJ = lmDir[pj]; 439 work1[j] = 0; 440 for (int i = 0; i <= j; ++i) { 441 work1[i] += weightedJacobian[i][pj] * dirJ; 442 } 443 } 444 double coeff1 = 0; 445 for (int j = 0; j < solvedCols; ++j) { 446 coeff1 += work1[j] * work1[j]; 447 } 448 double pc2 = previousCost * previousCost; 449 coeff1 = coeff1 / pc2; 450 double coeff2 = lmPar * lmNorm * lmNorm / pc2; 451 double preRed = coeff1 + 2 * coeff2; 452 double dirDer = -(coeff1 + coeff2); 453 454 // ratio of the actual to the predicted reduction 455 ratio = (preRed == 0) ? 0 : (actRed / preRed); 456 457 // update the step bound 458 if (ratio <= 0.25) { 459 double tmp = 460 (actRed < 0) ? (0.5 * dirDer / (dirDer + 0.5 * actRed)) : 0.5; 461 if ((0.1 * currentCost >= previousCost) || (tmp < 0.1)) { 462 tmp = 0.1; 463 } 464 delta = tmp * FastMath.min(delta, 10.0 * lmNorm); 465 lmPar /= tmp; 466 } else if ((lmPar == 0) || (ratio >= 0.75)) { 467 delta = 2 * lmNorm; 468 lmPar *= 0.5; 469 } 470 471 // test for successful iteration. 472 if (ratio >= 1.0e-4) { 473 // successful iteration, update the norm 474 firstIteration = false; 475 xNorm = 0; 476 for (int k = 0; k < nC; ++k) { 477 double xK = diag[k] * currentPoint[k]; 478 xNorm += xK * xK; 479 } 480 xNorm = FastMath.sqrt(xNorm); 481 482 // tests for convergence. 483 if (checker != null) { 484 // we use the vectorial convergence checker 485 if (checker.converged(iter, previous, current)) { 486 setCost(currentCost); 487 // Update (deprecated) "point" field. 488 point = current.getPoint(); 489 return current; 490 } 491 } 492 } else { 493 // failed iteration, reset the previous values 494 currentCost = previousCost; 495 for (int j = 0; j < solvedCols; ++j) { 496 int pj = permutation[j]; 497 currentPoint[pj] = oldX[pj]; 498 } 499 tmpVec = weightedResidual; 500 weightedResidual = oldRes; 501 oldRes = tmpVec; 502 tmpVec = currentObjective; 503 currentObjective = oldObj; 504 oldObj = tmpVec; 505 // Reset "current" to previous values. 506 current = new PointVectorValuePair(currentPoint, currentObjective); 507 } 508 509 // Default convergence criteria. 510 if ((FastMath.abs(actRed) <= costRelativeTolerance && 511 preRed <= costRelativeTolerance && 512 ratio <= 2.0) || 513 delta <= parRelativeTolerance * xNorm) { 514 setCost(currentCost); 515 // Update (deprecated) "point" field. 516 point = current.getPoint(); 517 return current; 518 } 519 520 // tests for termination and stringent tolerances 521 // (2.2204e-16 is the machine epsilon for IEEE754) 522 if ((FastMath.abs(actRed) <= 2.2204e-16) && (preRed <= 2.2204e-16) && (ratio <= 2.0)) { 523 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_COST_RELATIVE_TOLERANCE, 524 costRelativeTolerance); 525 } else if (delta <= 2.2204e-16 * xNorm) { 526 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_PARAMETERS_RELATIVE_TOLERANCE, 527 parRelativeTolerance); 528 } else if (maxCosine <= 2.2204e-16) { 529 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_ORTHOGONALITY_TOLERANCE, 530 orthoTolerance); 531 } 532 } 533 } 534 } 535 536 /** 537 * Determine the Levenberg-Marquardt parameter. 538 * <p>This implementation is a translation in Java of the MINPACK 539 * <a href="http://www.netlib.org/minpack/lmpar.f">lmpar</a> 540 * routine.</p> 541 * <p>This method sets the lmPar and lmDir attributes.</p> 542 * <p>The authors of the original fortran function are:</p> 543 * <ul> 544 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 545 * <li>Burton S. Garbow</li> 546 * <li>Kenneth E. Hillstrom</li> 547 * <li>Jorge J. More</li> 548 * </ul> 549 * <p>Luc Maisonobe did the Java translation.</p> 550 * 551 * @param qy array containing qTy 552 * @param delta upper bound on the euclidean norm of diagR * lmDir 553 * @param diag diagonal matrix 554 * @param work1 work array 555 * @param work2 work array 556 * @param work3 work array 557 */ 558 private void determineLMParameter(double[] qy, double delta, double[] diag, 559 double[] work1, double[] work2, double[] work3) { 560 final int nC = weightedJacobian[0].length; 561 562 // compute and store in x the gauss-newton direction, if the 563 // jacobian is rank-deficient, obtain a least squares solution 564 for (int j = 0; j < rank; ++j) { 565 lmDir[permutation[j]] = qy[j]; 566 } 567 for (int j = rank; j < nC; ++j) { 568 lmDir[permutation[j]] = 0; 569 } 570 for (int k = rank - 1; k >= 0; --k) { 571 int pk = permutation[k]; 572 double ypk = lmDir[pk] / diagR[pk]; 573 for (int i = 0; i < k; ++i) { 574 lmDir[permutation[i]] -= ypk * weightedJacobian[i][pk]; 575 } 576 lmDir[pk] = ypk; 577 } 578 579 // evaluate the function at the origin, and test 580 // for acceptance of the Gauss-Newton direction 581 double dxNorm = 0; 582 for (int j = 0; j < solvedCols; ++j) { 583 int pj = permutation[j]; 584 double s = diag[pj] * lmDir[pj]; 585 work1[pj] = s; 586 dxNorm += s * s; 587 } 588 dxNorm = FastMath.sqrt(dxNorm); 589 double fp = dxNorm - delta; 590 if (fp <= 0.1 * delta) { 591 lmPar = 0; 592 return; 593 } 594 595 // if the jacobian is not rank deficient, the Newton step provides 596 // a lower bound, parl, for the zero of the function, 597 // otherwise set this bound to zero 598 double sum2; 599 double parl = 0; 600 if (rank == solvedCols) { 601 for (int j = 0; j < solvedCols; ++j) { 602 int pj = permutation[j]; 603 work1[pj] *= diag[pj] / dxNorm; 604 } 605 sum2 = 0; 606 for (int j = 0; j < solvedCols; ++j) { 607 int pj = permutation[j]; 608 double sum = 0; 609 for (int i = 0; i < j; ++i) { 610 sum += weightedJacobian[i][pj] * work1[permutation[i]]; 611 } 612 double s = (work1[pj] - sum) / diagR[pj]; 613 work1[pj] = s; 614 sum2 += s * s; 615 } 616 parl = fp / (delta * sum2); 617 } 618 619 // calculate an upper bound, paru, for the zero of the function 620 sum2 = 0; 621 for (int j = 0; j < solvedCols; ++j) { 622 int pj = permutation[j]; 623 double sum = 0; 624 for (int i = 0; i <= j; ++i) { 625 sum += weightedJacobian[i][pj] * qy[i]; 626 } 627 sum /= diag[pj]; 628 sum2 += sum * sum; 629 } 630 double gNorm = FastMath.sqrt(sum2); 631 double paru = gNorm / delta; 632 if (paru == 0) { 633 // 2.2251e-308 is the smallest positive real for IEE754 634 paru = 2.2251e-308 / FastMath.min(delta, 0.1); 635 } 636 637 // if the input par lies outside of the interval (parl,paru), 638 // set par to the closer endpoint 639 lmPar = FastMath.min(paru, FastMath.max(lmPar, parl)); 640 if (lmPar == 0) { 641 lmPar = gNorm / dxNorm; 642 } 643 644 for (int countdown = 10; countdown >= 0; --countdown) { 645 646 // evaluate the function at the current value of lmPar 647 if (lmPar == 0) { 648 lmPar = FastMath.max(2.2251e-308, 0.001 * paru); 649 } 650 double sPar = FastMath.sqrt(lmPar); 651 for (int j = 0; j < solvedCols; ++j) { 652 int pj = permutation[j]; 653 work1[pj] = sPar * diag[pj]; 654 } 655 determineLMDirection(qy, work1, work2, work3); 656 657 dxNorm = 0; 658 for (int j = 0; j < solvedCols; ++j) { 659 int pj = permutation[j]; 660 double s = diag[pj] * lmDir[pj]; 661 work3[pj] = s; 662 dxNorm += s * s; 663 } 664 dxNorm = FastMath.sqrt(dxNorm); 665 double previousFP = fp; 666 fp = dxNorm - delta; 667 668 // if the function is small enough, accept the current value 669 // of lmPar, also test for the exceptional cases where parl is zero 670 if ((FastMath.abs(fp) <= 0.1 * delta) || 671 ((parl == 0) && (fp <= previousFP) && (previousFP < 0))) { 672 return; 673 } 674 675 // compute the Newton correction 676 for (int j = 0; j < solvedCols; ++j) { 677 int pj = permutation[j]; 678 work1[pj] = work3[pj] * diag[pj] / dxNorm; 679 } 680 for (int j = 0; j < solvedCols; ++j) { 681 int pj = permutation[j]; 682 work1[pj] /= work2[j]; 683 double tmp = work1[pj]; 684 for (int i = j + 1; i < solvedCols; ++i) { 685 work1[permutation[i]] -= weightedJacobian[i][pj] * tmp; 686 } 687 } 688 sum2 = 0; 689 for (int j = 0; j < solvedCols; ++j) { 690 double s = work1[permutation[j]]; 691 sum2 += s * s; 692 } 693 double correction = fp / (delta * sum2); 694 695 // depending on the sign of the function, update parl or paru. 696 if (fp > 0) { 697 parl = FastMath.max(parl, lmPar); 698 } else if (fp < 0) { 699 paru = FastMath.min(paru, lmPar); 700 } 701 702 // compute an improved estimate for lmPar 703 lmPar = FastMath.max(parl, lmPar + correction); 704 705 } 706 } 707 708 /** 709 * Solve a*x = b and d*x = 0 in the least squares sense. 710 * <p>This implementation is a translation in Java of the MINPACK 711 * <a href="http://www.netlib.org/minpack/qrsolv.f">qrsolv</a> 712 * routine.</p> 713 * <p>This method sets the lmDir and lmDiag attributes.</p> 714 * <p>The authors of the original fortran function are:</p> 715 * <ul> 716 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 717 * <li>Burton S. Garbow</li> 718 * <li>Kenneth E. Hillstrom</li> 719 * <li>Jorge J. More</li> 720 * </ul> 721 * <p>Luc Maisonobe did the Java translation.</p> 722 * 723 * @param qy array containing qTy 724 * @param diag diagonal matrix 725 * @param lmDiag diagonal elements associated with lmDir 726 * @param work work array 727 */ 728 private void determineLMDirection(double[] qy, double[] diag, 729 double[] lmDiag, double[] work) { 730 731 // copy R and Qty to preserve input and initialize s 732 // in particular, save the diagonal elements of R in lmDir 733 for (int j = 0; j < solvedCols; ++j) { 734 int pj = permutation[j]; 735 for (int i = j + 1; i < solvedCols; ++i) { 736 weightedJacobian[i][pj] = weightedJacobian[j][permutation[i]]; 737 } 738 lmDir[j] = diagR[pj]; 739 work[j] = qy[j]; 740 } 741 742 // eliminate the diagonal matrix d using a Givens rotation 743 for (int j = 0; j < solvedCols; ++j) { 744 745 // prepare the row of d to be eliminated, locating the 746 // diagonal element using p from the Q.R. factorization 747 int pj = permutation[j]; 748 double dpj = diag[pj]; 749 if (dpj != 0) { 750 Arrays.fill(lmDiag, j + 1, lmDiag.length, 0); 751 } 752 lmDiag[j] = dpj; 753 754 // the transformations to eliminate the row of d 755 // modify only a single element of Qty 756 // beyond the first n, which is initially zero. 757 double qtbpj = 0; 758 for (int k = j; k < solvedCols; ++k) { 759 int pk = permutation[k]; 760 761 // determine a Givens rotation which eliminates the 762 // appropriate element in the current row of d 763 if (lmDiag[k] != 0) { 764 765 final double sin; 766 final double cos; 767 double rkk = weightedJacobian[k][pk]; 768 if (FastMath.abs(rkk) < FastMath.abs(lmDiag[k])) { 769 final double cotan = rkk / lmDiag[k]; 770 sin = 1.0 / FastMath.sqrt(1.0 + cotan * cotan); 771 cos = sin * cotan; 772 } else { 773 final double tan = lmDiag[k] / rkk; 774 cos = 1.0 / FastMath.sqrt(1.0 + tan * tan); 775 sin = cos * tan; 776 } 777 778 // compute the modified diagonal element of R and 779 // the modified element of (Qty,0) 780 weightedJacobian[k][pk] = cos * rkk + sin * lmDiag[k]; 781 final double temp = cos * work[k] + sin * qtbpj; 782 qtbpj = -sin * work[k] + cos * qtbpj; 783 work[k] = temp; 784 785 // accumulate the tranformation in the row of s 786 for (int i = k + 1; i < solvedCols; ++i) { 787 double rik = weightedJacobian[i][pk]; 788 final double temp2 = cos * rik + sin * lmDiag[i]; 789 lmDiag[i] = -sin * rik + cos * lmDiag[i]; 790 weightedJacobian[i][pk] = temp2; 791 } 792 } 793 } 794 795 // store the diagonal element of s and restore 796 // the corresponding diagonal element of R 797 lmDiag[j] = weightedJacobian[j][permutation[j]]; 798 weightedJacobian[j][permutation[j]] = lmDir[j]; 799 } 800 801 // solve the triangular system for z, if the system is 802 // singular, then obtain a least squares solution 803 int nSing = solvedCols; 804 for (int j = 0; j < solvedCols; ++j) { 805 if ((lmDiag[j] == 0) && (nSing == solvedCols)) { 806 nSing = j; 807 } 808 if (nSing < solvedCols) { 809 work[j] = 0; 810 } 811 } 812 if (nSing > 0) { 813 for (int j = nSing - 1; j >= 0; --j) { 814 int pj = permutation[j]; 815 double sum = 0; 816 for (int i = j + 1; i < nSing; ++i) { 817 sum += weightedJacobian[i][pj] * work[i]; 818 } 819 work[j] = (work[j] - sum) / lmDiag[j]; 820 } 821 } 822 823 // permute the components of z back to components of lmDir 824 for (int j = 0; j < lmDir.length; ++j) { 825 lmDir[permutation[j]] = work[j]; 826 } 827 } 828 829 /** 830 * Decompose a matrix A as A.P = Q.R using Householder transforms. 831 * <p>As suggested in the P. Lascaux and R. Theodor book 832 * <i>Analyse numérique matricielle appliquée à 833 * l'art de l'ingénieur</i> (Masson, 1986), instead of representing 834 * the Householder transforms with u<sub>k</sub> unit vectors such that: 835 * <pre> 836 * H<sub>k</sub> = I - 2u<sub>k</sub>.u<sub>k</sub><sup>t</sup> 837 * </pre> 838 * we use <sub>k</sub> non-unit vectors such that: 839 * <pre> 840 * H<sub>k</sub> = I - beta<sub>k</sub>v<sub>k</sub>.v<sub>k</sub><sup>t</sup> 841 * </pre> 842 * where v<sub>k</sub> = a<sub>k</sub> - alpha<sub>k</sub> e<sub>k</sub>. 843 * The beta<sub>k</sub> coefficients are provided upon exit as recomputing 844 * them from the v<sub>k</sub> vectors would be costly.</p> 845 * <p>This decomposition handles rank deficient cases since the tranformations 846 * are performed in non-increasing columns norms order thanks to columns 847 * pivoting. The diagonal elements of the R matrix are therefore also in 848 * non-increasing absolute values order.</p> 849 * 850 * @param jacobian Weighted Jacobian matrix at the current point. 851 * @exception ConvergenceException if the decomposition cannot be performed 852 */ 853 private void qrDecomposition(RealMatrix jacobian) throws ConvergenceException { 854 // Code in this class assumes that the weighted Jacobian is -(W^(1/2) J), 855 // hence the multiplication by -1. 856 weightedJacobian = jacobian.scalarMultiply(-1).getData(); 857 858 final int nR = weightedJacobian.length; 859 final int nC = weightedJacobian[0].length; 860 861 // initializations 862 for (int k = 0; k < nC; ++k) { 863 permutation[k] = k; 864 double norm2 = 0; 865 for (int i = 0; i < nR; ++i) { 866 double akk = weightedJacobian[i][k]; 867 norm2 += akk * akk; 868 } 869 jacNorm[k] = FastMath.sqrt(norm2); 870 } 871 872 // transform the matrix column after column 873 for (int k = 0; k < nC; ++k) { 874 875 // select the column with the greatest norm on active components 876 int nextColumn = -1; 877 double ak2 = Double.NEGATIVE_INFINITY; 878 for (int i = k; i < nC; ++i) { 879 double norm2 = 0; 880 for (int j = k; j < nR; ++j) { 881 double aki = weightedJacobian[j][permutation[i]]; 882 norm2 += aki * aki; 883 } 884 if (Double.isInfinite(norm2) || Double.isNaN(norm2)) { 885 throw new ConvergenceException(LocalizedFormats.UNABLE_TO_PERFORM_QR_DECOMPOSITION_ON_JACOBIAN, 886 nR, nC); 887 } 888 if (norm2 > ak2) { 889 nextColumn = i; 890 ak2 = norm2; 891 } 892 } 893 if (ak2 <= qrRankingThreshold) { 894 rank = k; 895 return; 896 } 897 int pk = permutation[nextColumn]; 898 permutation[nextColumn] = permutation[k]; 899 permutation[k] = pk; 900 901 // choose alpha such that Hk.u = alpha ek 902 double akk = weightedJacobian[k][pk]; 903 double alpha = (akk > 0) ? -FastMath.sqrt(ak2) : FastMath.sqrt(ak2); 904 double betak = 1.0 / (ak2 - akk * alpha); 905 beta[pk] = betak; 906 907 // transform the current column 908 diagR[pk] = alpha; 909 weightedJacobian[k][pk] -= alpha; 910 911 // transform the remaining columns 912 for (int dk = nC - 1 - k; dk > 0; --dk) { 913 double gamma = 0; 914 for (int j = k; j < nR; ++j) { 915 gamma += weightedJacobian[j][pk] * weightedJacobian[j][permutation[k + dk]]; 916 } 917 gamma *= betak; 918 for (int j = k; j < nR; ++j) { 919 weightedJacobian[j][permutation[k + dk]] -= gamma * weightedJacobian[j][pk]; 920 } 921 } 922 } 923 rank = solvedCols; 924 } 925 926 /** 927 * Compute the product Qt.y for some Q.R. decomposition. 928 * 929 * @param y vector to multiply (will be overwritten with the result) 930 */ 931 private void qTy(double[] y) { 932 final int nR = weightedJacobian.length; 933 final int nC = weightedJacobian[0].length; 934 935 for (int k = 0; k < nC; ++k) { 936 int pk = permutation[k]; 937 double gamma = 0; 938 for (int i = k; i < nR; ++i) { 939 gamma += weightedJacobian[i][pk] * y[i]; 940 } 941 gamma *= beta[pk]; 942 for (int i = k; i < nR; ++i) { 943 y[i] -= gamma * weightedJacobian[i][pk]; 944 } 945 } 946 } 947 }