001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.math3.optimization.general; 019 020 import org.apache.commons.math3.analysis.DifferentiableMultivariateVectorFunction; 021 import org.apache.commons.math3.analysis.FunctionUtils; 022 import org.apache.commons.math3.analysis.differentiation.DerivativeStructure; 023 import org.apache.commons.math3.analysis.differentiation.MultivariateDifferentiableVectorFunction; 024 import org.apache.commons.math3.exception.DimensionMismatchException; 025 import org.apache.commons.math3.exception.NumberIsTooSmallException; 026 import org.apache.commons.math3.exception.util.LocalizedFormats; 027 import org.apache.commons.math3.linear.ArrayRealVector; 028 import org.apache.commons.math3.linear.RealMatrix; 029 import org.apache.commons.math3.linear.DecompositionSolver; 030 import org.apache.commons.math3.linear.MatrixUtils; 031 import org.apache.commons.math3.linear.QRDecomposition; 032 import org.apache.commons.math3.linear.EigenDecomposition; 033 import org.apache.commons.math3.optimization.OptimizationData; 034 import org.apache.commons.math3.optimization.InitialGuess; 035 import org.apache.commons.math3.optimization.Target; 036 import org.apache.commons.math3.optimization.Weight; 037 import org.apache.commons.math3.optimization.ConvergenceChecker; 038 import org.apache.commons.math3.optimization.DifferentiableMultivariateVectorOptimizer; 039 import org.apache.commons.math3.optimization.PointVectorValuePair; 040 import org.apache.commons.math3.optimization.direct.BaseAbstractMultivariateVectorOptimizer; 041 import org.apache.commons.math3.util.FastMath; 042 043 /** 044 * Base class for implementing least squares optimizers. 045 * It handles the boilerplate methods associated to thresholds settings, 046 * Jacobian and error estimation. 047 * <br/> 048 * This class constructs the Jacobian matrix of the function argument in method 049 * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,MultivariateVectorFunction,OptimizationData[]) 050 * optimize} and assumes that the rows of that matrix iterate on the model 051 * functions while the columns iterate on the parameters; thus, the numbers 052 * of rows is equal to the dimension of the 053 * {@link org.apache.commons.math3.optimization.Target Target} while 054 * the number of columns is equal to the dimension of the 055 * {@link org.apache.commons.math3.optimization.InitialGuess InitialGuess}. 056 * 057 * @version $Id: AbstractLeastSquaresOptimizer.java 1422313 2012-12-15 18:53:41Z psteitz $ 058 * @deprecated As of 3.1 (to be removed in 4.0). 059 * @since 1.2 060 */ 061 @Deprecated 062 public abstract class AbstractLeastSquaresOptimizer 063 extends BaseAbstractMultivariateVectorOptimizer<DifferentiableMultivariateVectorFunction> 064 implements DifferentiableMultivariateVectorOptimizer { 065 /** 066 * Singularity threshold (cf. {@link #getCovariances(double)}). 067 * @deprecated As of 3.1. 068 */ 069 @Deprecated 070 private static final double DEFAULT_SINGULARITY_THRESHOLD = 1e-14; 071 /** 072 * Jacobian matrix of the weighted residuals. 073 * This matrix is in canonical form just after the calls to 074 * {@link #updateJacobian()}, but may be modified by the solver 075 * in the derived class (the {@link LevenbergMarquardtOptimizer 076 * Levenberg-Marquardt optimizer} does this). 077 * @deprecated As of 3.1. To be removed in 4.0. Please use 078 * {@link #computeWeightedJacobian(double[])} instead. 079 */ 080 @Deprecated 081 protected double[][] weightedResidualJacobian; 082 /** Number of columns of the jacobian matrix. 083 * @deprecated As of 3.1. 084 */ 085 @Deprecated 086 protected int cols; 087 /** Number of rows of the jacobian matrix. 088 * @deprecated As of 3.1. 089 */ 090 @Deprecated 091 protected int rows; 092 /** Current point. 093 * @deprecated As of 3.1. 094 */ 095 @Deprecated 096 protected double[] point; 097 /** Current objective function value. 098 * @deprecated As of 3.1. 099 */ 100 @Deprecated 101 protected double[] objective; 102 /** Weighted residuals 103 * @deprecated As of 3.1. 104 */ 105 @Deprecated 106 protected double[] weightedResiduals; 107 /** Cost value (square root of the sum of the residuals). 108 * @deprecated As of 3.1. Field to become "private" in 4.0. 109 * Please use {@link #setCost(double)}. 110 */ 111 @Deprecated 112 protected double cost; 113 /** Objective function derivatives. */ 114 private MultivariateDifferentiableVectorFunction jF; 115 /** Number of evaluations of the Jacobian. */ 116 private int jacobianEvaluations; 117 /** Square-root of the weight matrix. */ 118 private RealMatrix weightMatrixSqrt; 119 120 /** 121 * Simple constructor with default settings. 122 * The convergence check is set to a {@link 123 * org.apache.commons.math3.optimization.SimpleVectorValueChecker}. 124 * @deprecated See {@link org.apache.commons.math3.optimization.SimpleValueChecker#SimpleValueChecker()} 125 */ 126 @Deprecated 127 protected AbstractLeastSquaresOptimizer() {} 128 129 /** 130 * @param checker Convergence checker. 131 */ 132 protected AbstractLeastSquaresOptimizer(ConvergenceChecker<PointVectorValuePair> checker) { 133 super(checker); 134 } 135 136 /** 137 * @return the number of evaluations of the Jacobian function. 138 */ 139 public int getJacobianEvaluations() { 140 return jacobianEvaluations; 141 } 142 143 /** 144 * Update the jacobian matrix. 145 * 146 * @throws DimensionMismatchException if the Jacobian dimension does not 147 * match problem dimension. 148 * @deprecated As of 3.1. Please use {@link #computeWeightedJacobian(double[])} 149 * instead. 150 */ 151 @Deprecated 152 protected void updateJacobian() { 153 final RealMatrix weightedJacobian = computeWeightedJacobian(point); 154 weightedResidualJacobian = weightedJacobian.scalarMultiply(-1).getData(); 155 } 156 157 /** 158 * Computes the Jacobian matrix. 159 * 160 * @param params Model parameters at which to compute the Jacobian. 161 * @return the weighted Jacobian: W<sup>1/2</sup> J. 162 * @throws DimensionMismatchException if the Jacobian dimension does not 163 * match problem dimension. 164 * @since 3.1 165 */ 166 protected RealMatrix computeWeightedJacobian(double[] params) { 167 ++jacobianEvaluations; 168 169 final DerivativeStructure[] dsPoint = new DerivativeStructure[params.length]; 170 final int nC = params.length; 171 for (int i = 0; i < nC; ++i) { 172 dsPoint[i] = new DerivativeStructure(nC, 1, i, params[i]); 173 } 174 final DerivativeStructure[] dsValue = jF.value(dsPoint); 175 final int nR = getTarget().length; 176 if (dsValue.length != nR) { 177 throw new DimensionMismatchException(dsValue.length, nR); 178 } 179 final double[][] jacobianData = new double[nR][nC]; 180 for (int i = 0; i < nR; ++i) { 181 int[] orders = new int[nC]; 182 for (int j = 0; j < nC; ++j) { 183 orders[j] = 1; 184 jacobianData[i][j] = dsValue[i].getPartialDerivative(orders); 185 orders[j] = 0; 186 } 187 } 188 189 return weightMatrixSqrt.multiply(MatrixUtils.createRealMatrix(jacobianData)); 190 } 191 192 /** 193 * Update the residuals array and cost function value. 194 * @throws DimensionMismatchException if the dimension does not match the 195 * problem dimension. 196 * @throws org.apache.commons.math3.exception.TooManyEvaluationsException 197 * if the maximal number of evaluations is exceeded. 198 * @deprecated As of 3.1. Please use {@link #computeResiduals(double[])}, 199 * {@link #computeObjectiveValue(double[])}, {@link #computeCost(double[])} 200 * and {@link #setCost(double)} instead. 201 */ 202 @Deprecated 203 protected void updateResidualsAndCost() { 204 objective = computeObjectiveValue(point); 205 final double[] res = computeResiduals(objective); 206 207 // Compute cost. 208 cost = computeCost(res); 209 210 // Compute weighted residuals. 211 final ArrayRealVector residuals = new ArrayRealVector(res); 212 weightedResiduals = weightMatrixSqrt.operate(residuals).toArray(); 213 } 214 215 /** 216 * Computes the cost. 217 * 218 * @param residuals Residuals. 219 * @return the cost. 220 * @see #computeResiduals(double[]) 221 * @since 3.1 222 */ 223 protected double computeCost(double[] residuals) { 224 final ArrayRealVector r = new ArrayRealVector(residuals); 225 return FastMath.sqrt(r.dotProduct(getWeight().operate(r))); 226 } 227 228 /** 229 * Get the Root Mean Square value. 230 * Get the Root Mean Square value, i.e. the root of the arithmetic 231 * mean of the square of all weighted residuals. This is related to the 232 * criterion that is minimized by the optimizer as follows: if 233 * <em>c</em> if the criterion, and <em>n</em> is the number of 234 * measurements, then the RMS is <em>sqrt (c/n)</em>. 235 * 236 * @return RMS value 237 */ 238 public double getRMS() { 239 return FastMath.sqrt(getChiSquare() / rows); 240 } 241 242 /** 243 * Get a Chi-Square-like value assuming the N residuals follow N 244 * distinct normal distributions centered on 0 and whose variances are 245 * the reciprocal of the weights. 246 * @return chi-square value 247 */ 248 public double getChiSquare() { 249 return cost * cost; 250 } 251 252 /** 253 * Gets the square-root of the weight matrix. 254 * 255 * @return the square-root of the weight matrix. 256 * @since 3.1 257 */ 258 public RealMatrix getWeightSquareRoot() { 259 return weightMatrixSqrt.copy(); 260 } 261 262 /** 263 * Sets the cost. 264 * 265 * @param cost Cost value. 266 * @since 3.1 267 */ 268 protected void setCost(double cost) { 269 this.cost = cost; 270 } 271 272 /** 273 * Get the covariance matrix of the optimized parameters. 274 * 275 * @return the covariance matrix. 276 * @throws org.apache.commons.math3.linear.SingularMatrixException 277 * if the covariance matrix cannot be computed (singular problem). 278 * @see #getCovariances(double) 279 * @deprecated As of 3.1. Please use {@link #computeCovariances(double[],double)} 280 * instead. 281 */ 282 @Deprecated 283 public double[][] getCovariances() { 284 return getCovariances(DEFAULT_SINGULARITY_THRESHOLD); 285 } 286 287 /** 288 * Get the covariance matrix of the optimized parameters. 289 * <br/> 290 * Note that this operation involves the inversion of the 291 * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the 292 * Jacobian matrix. 293 * The {@code threshold} parameter is a way for the caller to specify 294 * that the result of this computation should be considered meaningless, 295 * and thus trigger an exception. 296 * 297 * @param threshold Singularity threshold. 298 * @return the covariance matrix. 299 * @throws org.apache.commons.math3.linear.SingularMatrixException 300 * if the covariance matrix cannot be computed (singular problem). 301 * @deprecated As of 3.1. Please use {@link #computeCovariances(double[],double)} 302 * instead. 303 */ 304 @Deprecated 305 public double[][] getCovariances(double threshold) { 306 return computeCovariances(point, threshold); 307 } 308 309 /** 310 * Get the covariance matrix of the optimized parameters. 311 * <br/> 312 * Note that this operation involves the inversion of the 313 * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the 314 * Jacobian matrix. 315 * The {@code threshold} parameter is a way for the caller to specify 316 * that the result of this computation should be considered meaningless, 317 * and thus trigger an exception. 318 * 319 * @param params Model parameters. 320 * @param threshold Singularity threshold. 321 * @return the covariance matrix. 322 * @throws org.apache.commons.math3.linear.SingularMatrixException 323 * if the covariance matrix cannot be computed (singular problem). 324 * @since 3.1 325 */ 326 public double[][] computeCovariances(double[] params, 327 double threshold) { 328 // Set up the Jacobian. 329 final RealMatrix j = computeWeightedJacobian(params); 330 331 // Compute transpose(J)J. 332 final RealMatrix jTj = j.transpose().multiply(j); 333 334 // Compute the covariances matrix. 335 final DecompositionSolver solver 336 = new QRDecomposition(jTj, threshold).getSolver(); 337 return solver.getInverse().getData(); 338 } 339 340 /** 341 * <p> 342 * Returns an estimate of the standard deviation of each parameter. The 343 * returned values are the so-called (asymptotic) standard errors on the 344 * parameters, defined as {@code sd(a[i]) = sqrt(S / (n - m) * C[i][i])}, 345 * where {@code a[i]} is the optimized value of the {@code i}-th parameter, 346 * {@code S} is the minimized value of the sum of squares objective function 347 * (as returned by {@link #getChiSquare()}), {@code n} is the number of 348 * observations, {@code m} is the number of parameters and {@code C} is the 349 * covariance matrix. 350 * </p> 351 * <p> 352 * See also 353 * <a href="http://en.wikipedia.org/wiki/Least_squares">Wikipedia</a>, 354 * or 355 * <a href="http://mathworld.wolfram.com/LeastSquaresFitting.html">MathWorld</a>, 356 * equations (34) and (35) for a particular case. 357 * </p> 358 * 359 * @return an estimate of the standard deviation of the optimized parameters 360 * @throws org.apache.commons.math3.linear.SingularMatrixException 361 * if the covariance matrix cannot be computed. 362 * @throws NumberIsTooSmallException if the number of degrees of freedom is not 363 * positive, i.e. the number of measurements is less or equal to the number of 364 * parameters. 365 * @deprecated as of version 3.1, {@link #computeSigma(double[],double)} should be used 366 * instead. It should be emphasized that {@code guessParametersErrors} and 367 * {@code computeSigma} are <em>not</em> strictly equivalent. 368 */ 369 @Deprecated 370 public double[] guessParametersErrors() { 371 if (rows <= cols) { 372 throw new NumberIsTooSmallException(LocalizedFormats.NO_DEGREES_OF_FREEDOM, 373 rows, cols, false); 374 } 375 double[] errors = new double[cols]; 376 final double c = FastMath.sqrt(getChiSquare() / (rows - cols)); 377 double[][] covar = computeCovariances(point, 1e-14); 378 for (int i = 0; i < errors.length; ++i) { 379 errors[i] = FastMath.sqrt(covar[i][i]) * c; 380 } 381 return errors; 382 } 383 384 /** 385 * Computes an estimate of the standard deviation of the parameters. The 386 * returned values are the square root of the diagonal coefficients of the 387 * covariance matrix, {@code sd(a[i]) ~= sqrt(C[i][i])}, where {@code a[i]} 388 * is the optimized value of the {@code i}-th parameter, and {@code C} is 389 * the covariance matrix. 390 * 391 * @param params Model parameters. 392 * @param covarianceSingularityThreshold Singularity threshold (see 393 * {@link #computeCovariances(double[],double) computeCovariances}). 394 * @return an estimate of the standard deviation of the optimized parameters 395 * @throws org.apache.commons.math3.linear.SingularMatrixException 396 * if the covariance matrix cannot be computed. 397 * @since 3.1 398 */ 399 public double[] computeSigma(double[] params, 400 double covarianceSingularityThreshold) { 401 final int nC = params.length; 402 final double[] sig = new double[nC]; 403 final double[][] cov = computeCovariances(params, covarianceSingularityThreshold); 404 for (int i = 0; i < nC; ++i) { 405 sig[i] = FastMath.sqrt(cov[i][i]); 406 } 407 return sig; 408 } 409 410 /** {@inheritDoc} 411 * @deprecated As of 3.1. Please use 412 * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,MultivariateVectorFunction,OptimizationData[]) 413 * optimize(int,MultivariateDifferentiableVectorFunction,OptimizationData...)} 414 * instead. 415 */ 416 @Override 417 @Deprecated 418 public PointVectorValuePair optimize(int maxEval, 419 final DifferentiableMultivariateVectorFunction f, 420 final double[] target, final double[] weights, 421 final double[] startPoint) { 422 return optimizeInternal(maxEval, 423 FunctionUtils.toMultivariateDifferentiableVectorFunction(f), 424 new Target(target), 425 new Weight(weights), 426 new InitialGuess(startPoint)); 427 } 428 429 /** 430 * Optimize an objective function. 431 * Optimization is considered to be a weighted least-squares minimization. 432 * The cost function to be minimized is 433 * <code>∑weight<sub>i</sub>(objective<sub>i</sub> - target<sub>i</sub>)<sup>2</sup></code> 434 * 435 * @param f Objective function. 436 * @param target Target value for the objective functions at optimum. 437 * @param weights Weights for the least squares cost computation. 438 * @param startPoint Start point for optimization. 439 * @return the point/value pair giving the optimal value for objective 440 * function. 441 * @param maxEval Maximum number of function evaluations. 442 * @throws org.apache.commons.math3.exception.DimensionMismatchException 443 * if the start point dimension is wrong. 444 * @throws org.apache.commons.math3.exception.TooManyEvaluationsException 445 * if the maximal number of evaluations is exceeded. 446 * @throws org.apache.commons.math3.exception.NullArgumentException if 447 * any argument is {@code null}. 448 * @deprecated As of 3.1. Please use 449 * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,MultivariateVectorFunction,OptimizationData[]) 450 * optimize(int,MultivariateDifferentiableVectorFunction,OptimizationData...)} 451 * instead. 452 */ 453 @Deprecated 454 public PointVectorValuePair optimize(final int maxEval, 455 final MultivariateDifferentiableVectorFunction f, 456 final double[] target, final double[] weights, 457 final double[] startPoint) { 458 return optimizeInternal(maxEval, f, 459 new Target(target), 460 new Weight(weights), 461 new InitialGuess(startPoint)); 462 } 463 464 /** 465 * Optimize an objective function. 466 * Optimization is considered to be a weighted least-squares minimization. 467 * The cost function to be minimized is 468 * <code>∑weight<sub>i</sub>(objective<sub>i</sub> - target<sub>i</sub>)<sup>2</sup></code> 469 * 470 * @param maxEval Allowed number of evaluations of the objective function. 471 * @param f Objective function. 472 * @param optData Optimization data. The following data will be looked for: 473 * <ul> 474 * <li>{@link Target}</li> 475 * <li>{@link Weight}</li> 476 * <li>{@link InitialGuess}</li> 477 * </ul> 478 * @return the point/value pair giving the optimal value of the objective 479 * function. 480 * @throws org.apache.commons.math3.exception.TooManyEvaluationsException if 481 * the maximal number of evaluations is exceeded. 482 * @throws DimensionMismatchException if the target, and weight arguments 483 * have inconsistent dimensions. 484 * @see BaseAbstractMultivariateVectorOptimizer#optimizeInternal(int,MultivariateVectorFunction,OptimizationData[]) 485 * @since 3.1 486 * @deprecated As of 3.1. Override is necessary only until this class's generic 487 * argument is changed to {@code MultivariateDifferentiableVectorFunction}. 488 */ 489 @Deprecated 490 protected PointVectorValuePair optimizeInternal(final int maxEval, 491 final MultivariateDifferentiableVectorFunction f, 492 OptimizationData... optData) { 493 // XXX Conversion will be removed when the generic argument of the 494 // base class becomes "MultivariateDifferentiableVectorFunction". 495 return super.optimizeInternal(maxEval, FunctionUtils.toDifferentiableMultivariateVectorFunction(f), optData); 496 } 497 498 /** {@inheritDoc} */ 499 @Override 500 protected void setUp() { 501 super.setUp(); 502 503 // Reset counter. 504 jacobianEvaluations = 0; 505 506 // Square-root of the weight matrix. 507 weightMatrixSqrt = squareRoot(getWeight()); 508 509 // Store least squares problem characteristics. 510 // XXX The conversion won't be necessary when the generic argument of 511 // the base class becomes "MultivariateDifferentiableVectorFunction". 512 // XXX "jF" is not strictly necessary anymore but is currently more 513 // efficient than converting the value returned from "getObjectiveFunction()" 514 // every time it is used. 515 jF = FunctionUtils.toMultivariateDifferentiableVectorFunction((DifferentiableMultivariateVectorFunction) getObjectiveFunction()); 516 517 // Arrays shared with "private" and "protected" methods. 518 point = getStartPoint(); 519 rows = getTarget().length; 520 cols = point.length; 521 } 522 523 /** 524 * Computes the residuals. 525 * The residual is the difference between the observed (target) 526 * values and the model (objective function) value. 527 * There is one residual for each element of the vector-valued 528 * function. 529 * 530 * @param objectiveValue Value of the the objective function. This is 531 * the value returned from a call to 532 * {@link #computeObjectiveValue(double[]) computeObjectiveValue} 533 * (whose array argument contains the model parameters). 534 * @return the residuals. 535 * @throws DimensionMismatchException if {@code params} has a wrong 536 * length. 537 * @since 3.1 538 */ 539 protected double[] computeResiduals(double[] objectiveValue) { 540 final double[] target = getTarget(); 541 if (objectiveValue.length != target.length) { 542 throw new DimensionMismatchException(target.length, 543 objectiveValue.length); 544 } 545 546 final double[] residuals = new double[target.length]; 547 for (int i = 0; i < target.length; i++) { 548 residuals[i] = target[i] - objectiveValue[i]; 549 } 550 551 return residuals; 552 } 553 554 /** 555 * Computes the square-root of the weight matrix. 556 * 557 * @param m Symmetric, positive-definite (weight) matrix. 558 * @return the square-root of the weight matrix. 559 */ 560 private RealMatrix squareRoot(RealMatrix m) { 561 final EigenDecomposition dec = new EigenDecomposition(m); 562 return dec.getSquareRoot(); 563 } 564 }