001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math3.optimization.general;
019    
020    import org.apache.commons.math3.analysis.DifferentiableMultivariateVectorFunction;
021    import org.apache.commons.math3.analysis.FunctionUtils;
022    import org.apache.commons.math3.analysis.differentiation.DerivativeStructure;
023    import org.apache.commons.math3.analysis.differentiation.MultivariateDifferentiableVectorFunction;
024    import org.apache.commons.math3.exception.DimensionMismatchException;
025    import org.apache.commons.math3.exception.NumberIsTooSmallException;
026    import org.apache.commons.math3.exception.util.LocalizedFormats;
027    import org.apache.commons.math3.linear.ArrayRealVector;
028    import org.apache.commons.math3.linear.RealMatrix;
029    import org.apache.commons.math3.linear.DecompositionSolver;
030    import org.apache.commons.math3.linear.MatrixUtils;
031    import org.apache.commons.math3.linear.QRDecomposition;
032    import org.apache.commons.math3.linear.EigenDecomposition;
033    import org.apache.commons.math3.optimization.OptimizationData;
034    import org.apache.commons.math3.optimization.InitialGuess;
035    import org.apache.commons.math3.optimization.Target;
036    import org.apache.commons.math3.optimization.Weight;
037    import org.apache.commons.math3.optimization.ConvergenceChecker;
038    import org.apache.commons.math3.optimization.DifferentiableMultivariateVectorOptimizer;
039    import org.apache.commons.math3.optimization.PointVectorValuePair;
040    import org.apache.commons.math3.optimization.direct.BaseAbstractMultivariateVectorOptimizer;
041    import org.apache.commons.math3.util.FastMath;
042    
043    /**
044     * Base class for implementing least squares optimizers.
045     * It handles the boilerplate methods associated to thresholds settings,
046     * Jacobian and error estimation.
047     * <br/>
048     * This class constructs the Jacobian matrix of the function argument in method
049     * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,MultivariateVectorFunction,OptimizationData[])
050     * optimize} and assumes that the rows of that matrix iterate on the model
051     * functions while the columns iterate on the parameters; thus, the numbers
052     * of rows is equal to the dimension of the
053     * {@link org.apache.commons.math3.optimization.Target Target} while
054     * the number of columns is equal to the dimension of the
055     * {@link org.apache.commons.math3.optimization.InitialGuess InitialGuess}.
056     *
057     * @version $Id: AbstractLeastSquaresOptimizer.java 1422313 2012-12-15 18:53:41Z psteitz $
058     * @deprecated As of 3.1 (to be removed in 4.0).
059     * @since 1.2
060     */
061    @Deprecated
062    public abstract class AbstractLeastSquaresOptimizer
063        extends BaseAbstractMultivariateVectorOptimizer<DifferentiableMultivariateVectorFunction>
064        implements DifferentiableMultivariateVectorOptimizer {
065        /**
066         * Singularity threshold (cf. {@link #getCovariances(double)}).
067         * @deprecated As of 3.1.
068         */
069        @Deprecated
070        private static final double DEFAULT_SINGULARITY_THRESHOLD = 1e-14;
071        /**
072         * Jacobian matrix of the weighted residuals.
073         * This matrix is in canonical form just after the calls to
074         * {@link #updateJacobian()}, but may be modified by the solver
075         * in the derived class (the {@link LevenbergMarquardtOptimizer
076         * Levenberg-Marquardt optimizer} does this).
077         * @deprecated As of 3.1. To be removed in 4.0. Please use
078         * {@link #computeWeightedJacobian(double[])} instead.
079         */
080        @Deprecated
081        protected double[][] weightedResidualJacobian;
082        /** Number of columns of the jacobian matrix.
083         * @deprecated As of 3.1.
084         */
085        @Deprecated
086        protected int cols;
087        /** Number of rows of the jacobian matrix.
088         * @deprecated As of 3.1.
089         */
090        @Deprecated
091        protected int rows;
092        /** Current point.
093         * @deprecated As of 3.1.
094         */
095        @Deprecated
096        protected double[] point;
097        /** Current objective function value.
098         * @deprecated As of 3.1.
099         */
100        @Deprecated
101        protected double[] objective;
102        /** Weighted residuals
103         * @deprecated As of 3.1.
104         */
105        @Deprecated
106        protected double[] weightedResiduals;
107        /** Cost value (square root of the sum of the residuals).
108         * @deprecated As of 3.1. Field to become "private" in 4.0.
109         * Please use {@link #setCost(double)}.
110         */
111        @Deprecated
112        protected double cost;
113        /** Objective function derivatives. */
114        private MultivariateDifferentiableVectorFunction jF;
115        /** Number of evaluations of the Jacobian. */
116        private int jacobianEvaluations;
117        /** Square-root of the weight matrix. */
118        private RealMatrix weightMatrixSqrt;
119    
120        /**
121         * Simple constructor with default settings.
122         * The convergence check is set to a {@link
123         * org.apache.commons.math3.optimization.SimpleVectorValueChecker}.
124         * @deprecated See {@link org.apache.commons.math3.optimization.SimpleValueChecker#SimpleValueChecker()}
125         */
126        @Deprecated
127        protected AbstractLeastSquaresOptimizer() {}
128    
129        /**
130         * @param checker Convergence checker.
131         */
132        protected AbstractLeastSquaresOptimizer(ConvergenceChecker<PointVectorValuePair> checker) {
133            super(checker);
134        }
135    
136        /**
137         * @return the number of evaluations of the Jacobian function.
138         */
139        public int getJacobianEvaluations() {
140            return jacobianEvaluations;
141        }
142    
143        /**
144         * Update the jacobian matrix.
145         *
146         * @throws DimensionMismatchException if the Jacobian dimension does not
147         * match problem dimension.
148         * @deprecated As of 3.1. Please use {@link #computeWeightedJacobian(double[])}
149         * instead.
150         */
151        @Deprecated
152        protected void updateJacobian() {
153            final RealMatrix weightedJacobian = computeWeightedJacobian(point);
154            weightedResidualJacobian = weightedJacobian.scalarMultiply(-1).getData();
155        }
156    
157        /**
158         * Computes the Jacobian matrix.
159         *
160         * @param params Model parameters at which to compute the Jacobian.
161         * @return the weighted Jacobian: W<sup>1/2</sup> J.
162         * @throws DimensionMismatchException if the Jacobian dimension does not
163         * match problem dimension.
164         * @since 3.1
165         */
166        protected RealMatrix computeWeightedJacobian(double[] params) {
167            ++jacobianEvaluations;
168    
169            final DerivativeStructure[] dsPoint = new DerivativeStructure[params.length];
170            final int nC = params.length;
171            for (int i = 0; i < nC; ++i) {
172                dsPoint[i] = new DerivativeStructure(nC, 1, i, params[i]);
173            }
174            final DerivativeStructure[] dsValue = jF.value(dsPoint);
175            final int nR = getTarget().length;
176            if (dsValue.length != nR) {
177                throw new DimensionMismatchException(dsValue.length, nR);
178            }
179            final double[][] jacobianData = new double[nR][nC];
180            for (int i = 0; i < nR; ++i) {
181                int[] orders = new int[nC];
182                for (int j = 0; j < nC; ++j) {
183                    orders[j] = 1;
184                    jacobianData[i][j] = dsValue[i].getPartialDerivative(orders);
185                    orders[j] = 0;
186                }
187            }
188    
189            return weightMatrixSqrt.multiply(MatrixUtils.createRealMatrix(jacobianData));
190        }
191    
192        /**
193         * Update the residuals array and cost function value.
194         * @throws DimensionMismatchException if the dimension does not match the
195         * problem dimension.
196         * @throws org.apache.commons.math3.exception.TooManyEvaluationsException
197         * if the maximal number of evaluations is exceeded.
198         * @deprecated As of 3.1. Please use {@link #computeResiduals(double[])},
199         * {@link #computeObjectiveValue(double[])}, {@link #computeCost(double[])}
200         * and {@link #setCost(double)} instead.
201         */
202        @Deprecated
203        protected void updateResidualsAndCost() {
204            objective = computeObjectiveValue(point);
205            final double[] res = computeResiduals(objective);
206    
207            // Compute cost.
208            cost = computeCost(res);
209    
210            // Compute weighted residuals.
211            final ArrayRealVector residuals = new ArrayRealVector(res);
212            weightedResiduals = weightMatrixSqrt.operate(residuals).toArray();
213        }
214    
215        /**
216         * Computes the cost.
217         *
218         * @param residuals Residuals.
219         * @return the cost.
220         * @see #computeResiduals(double[])
221         * @since 3.1
222         */
223        protected double computeCost(double[] residuals) {
224            final ArrayRealVector r = new ArrayRealVector(residuals);
225            return FastMath.sqrt(r.dotProduct(getWeight().operate(r)));
226        }
227    
228        /**
229         * Get the Root Mean Square value.
230         * Get the Root Mean Square value, i.e. the root of the arithmetic
231         * mean of the square of all weighted residuals. This is related to the
232         * criterion that is minimized by the optimizer as follows: if
233         * <em>c</em> if the criterion, and <em>n</em> is the number of
234         * measurements, then the RMS is <em>sqrt (c/n)</em>.
235         *
236         * @return RMS value
237         */
238        public double getRMS() {
239            return FastMath.sqrt(getChiSquare() / rows);
240        }
241    
242        /**
243         * Get a Chi-Square-like value assuming the N residuals follow N
244         * distinct normal distributions centered on 0 and whose variances are
245         * the reciprocal of the weights.
246         * @return chi-square value
247         */
248        public double getChiSquare() {
249            return cost * cost;
250        }
251    
252        /**
253         * Gets the square-root of the weight matrix.
254         *
255         * @return the square-root of the weight matrix.
256         * @since 3.1
257         */
258        public RealMatrix getWeightSquareRoot() {
259            return weightMatrixSqrt.copy();
260        }
261    
262        /**
263         * Sets the cost.
264         *
265         * @param cost Cost value.
266         * @since 3.1
267         */
268        protected void setCost(double cost) {
269            this.cost = cost;
270        }
271    
272        /**
273         * Get the covariance matrix of the optimized parameters.
274         *
275         * @return the covariance matrix.
276         * @throws org.apache.commons.math3.linear.SingularMatrixException
277         * if the covariance matrix cannot be computed (singular problem).
278         * @see #getCovariances(double)
279         * @deprecated As of 3.1. Please use {@link #computeCovariances(double[],double)}
280         * instead.
281         */
282        @Deprecated
283        public double[][] getCovariances() {
284            return getCovariances(DEFAULT_SINGULARITY_THRESHOLD);
285        }
286    
287        /**
288         * Get the covariance matrix of the optimized parameters.
289         * <br/>
290         * Note that this operation involves the inversion of the
291         * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the
292         * Jacobian matrix.
293         * The {@code threshold} parameter is a way for the caller to specify
294         * that the result of this computation should be considered meaningless,
295         * and thus trigger an exception.
296         *
297         * @param threshold Singularity threshold.
298         * @return the covariance matrix.
299         * @throws org.apache.commons.math3.linear.SingularMatrixException
300         * if the covariance matrix cannot be computed (singular problem).
301         * @deprecated As of 3.1. Please use {@link #computeCovariances(double[],double)}
302         * instead.
303         */
304        @Deprecated
305        public double[][] getCovariances(double threshold) {
306            return computeCovariances(point, threshold);
307        }
308    
309        /**
310         * Get the covariance matrix of the optimized parameters.
311         * <br/>
312         * Note that this operation involves the inversion of the
313         * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the
314         * Jacobian matrix.
315         * The {@code threshold} parameter is a way for the caller to specify
316         * that the result of this computation should be considered meaningless,
317         * and thus trigger an exception.
318         *
319         * @param params Model parameters.
320         * @param threshold Singularity threshold.
321         * @return the covariance matrix.
322         * @throws org.apache.commons.math3.linear.SingularMatrixException
323         * if the covariance matrix cannot be computed (singular problem).
324         * @since 3.1
325         */
326        public double[][] computeCovariances(double[] params,
327                                             double threshold) {
328            // Set up the Jacobian.
329            final RealMatrix j = computeWeightedJacobian(params);
330    
331            // Compute transpose(J)J.
332            final RealMatrix jTj = j.transpose().multiply(j);
333    
334            // Compute the covariances matrix.
335            final DecompositionSolver solver
336                = new QRDecomposition(jTj, threshold).getSolver();
337            return solver.getInverse().getData();
338        }
339    
340        /**
341         * <p>
342         * Returns an estimate of the standard deviation of each parameter. The
343         * returned values are the so-called (asymptotic) standard errors on the
344         * parameters, defined as {@code sd(a[i]) = sqrt(S / (n - m) * C[i][i])},
345         * where {@code a[i]} is the optimized value of the {@code i}-th parameter,
346         * {@code S} is the minimized value of the sum of squares objective function
347         * (as returned by {@link #getChiSquare()}), {@code n} is the number of
348         * observations, {@code m} is the number of parameters and {@code C} is the
349         * covariance matrix.
350         * </p>
351         * <p>
352         * See also
353         * <a href="http://en.wikipedia.org/wiki/Least_squares">Wikipedia</a>,
354         * or
355         * <a href="http://mathworld.wolfram.com/LeastSquaresFitting.html">MathWorld</a>,
356         * equations (34) and (35) for a particular case.
357         * </p>
358         *
359         * @return an estimate of the standard deviation of the optimized parameters
360         * @throws org.apache.commons.math3.linear.SingularMatrixException
361         * if the covariance matrix cannot be computed.
362         * @throws NumberIsTooSmallException if the number of degrees of freedom is not
363         * positive, i.e. the number of measurements is less or equal to the number of
364         * parameters.
365         * @deprecated as of version 3.1, {@link #computeSigma(double[],double)} should be used
366         * instead. It should be emphasized that {@code guessParametersErrors} and
367         * {@code computeSigma} are <em>not</em> strictly equivalent.
368         */
369        @Deprecated
370        public double[] guessParametersErrors() {
371            if (rows <= cols) {
372                throw new NumberIsTooSmallException(LocalizedFormats.NO_DEGREES_OF_FREEDOM,
373                                                    rows, cols, false);
374            }
375            double[] errors = new double[cols];
376            final double c = FastMath.sqrt(getChiSquare() / (rows - cols));
377            double[][] covar = computeCovariances(point, 1e-14);
378            for (int i = 0; i < errors.length; ++i) {
379                errors[i] = FastMath.sqrt(covar[i][i]) * c;
380            }
381            return errors;
382        }
383    
384        /**
385         * Computes an estimate of the standard deviation of the parameters. The
386         * returned values are the square root of the diagonal coefficients of the
387         * covariance matrix, {@code sd(a[i]) ~= sqrt(C[i][i])}, where {@code a[i]}
388         * is the optimized value of the {@code i}-th parameter, and {@code C} is
389         * the covariance matrix.
390         *
391         * @param params Model parameters.
392         * @param covarianceSingularityThreshold Singularity threshold (see
393         * {@link #computeCovariances(double[],double) computeCovariances}).
394         * @return an estimate of the standard deviation of the optimized parameters
395         * @throws org.apache.commons.math3.linear.SingularMatrixException
396         * if the covariance matrix cannot be computed.
397         * @since 3.1
398         */
399        public double[] computeSigma(double[] params,
400                                     double covarianceSingularityThreshold) {
401            final int nC = params.length;
402            final double[] sig = new double[nC];
403            final double[][] cov = computeCovariances(params, covarianceSingularityThreshold);
404            for (int i = 0; i < nC; ++i) {
405                sig[i] = FastMath.sqrt(cov[i][i]);
406            }
407            return sig;
408        }
409    
410        /** {@inheritDoc}
411         * @deprecated As of 3.1. Please use
412         * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,MultivariateVectorFunction,OptimizationData[])
413         * optimize(int,MultivariateDifferentiableVectorFunction,OptimizationData...)}
414         * instead.
415         */
416        @Override
417        @Deprecated
418        public PointVectorValuePair optimize(int maxEval,
419                                             final DifferentiableMultivariateVectorFunction f,
420                                             final double[] target, final double[] weights,
421                                             final double[] startPoint) {
422            return optimizeInternal(maxEval,
423                                    FunctionUtils.toMultivariateDifferentiableVectorFunction(f),
424                                    new Target(target),
425                                    new Weight(weights),
426                                    new InitialGuess(startPoint));
427        }
428    
429        /**
430         * Optimize an objective function.
431         * Optimization is considered to be a weighted least-squares minimization.
432         * The cost function to be minimized is
433         * <code>&sum;weight<sub>i</sub>(objective<sub>i</sub> - target<sub>i</sub>)<sup>2</sup></code>
434         *
435         * @param f Objective function.
436         * @param target Target value for the objective functions at optimum.
437         * @param weights Weights for the least squares cost computation.
438         * @param startPoint Start point for optimization.
439         * @return the point/value pair giving the optimal value for objective
440         * function.
441         * @param maxEval Maximum number of function evaluations.
442         * @throws org.apache.commons.math3.exception.DimensionMismatchException
443         * if the start point dimension is wrong.
444         * @throws org.apache.commons.math3.exception.TooManyEvaluationsException
445         * if the maximal number of evaluations is exceeded.
446         * @throws org.apache.commons.math3.exception.NullArgumentException if
447         * any argument is {@code null}.
448         * @deprecated As of 3.1. Please use
449         * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,MultivariateVectorFunction,OptimizationData[])
450         * optimize(int,MultivariateDifferentiableVectorFunction,OptimizationData...)}
451         * instead.
452         */
453        @Deprecated
454        public PointVectorValuePair optimize(final int maxEval,
455                                             final MultivariateDifferentiableVectorFunction f,
456                                             final double[] target, final double[] weights,
457                                             final double[] startPoint) {
458            return optimizeInternal(maxEval, f,
459                                    new Target(target),
460                                    new Weight(weights),
461                                    new InitialGuess(startPoint));
462        }
463    
464        /**
465         * Optimize an objective function.
466         * Optimization is considered to be a weighted least-squares minimization.
467         * The cost function to be minimized is
468         * <code>&sum;weight<sub>i</sub>(objective<sub>i</sub> - target<sub>i</sub>)<sup>2</sup></code>
469         *
470         * @param maxEval Allowed number of evaluations of the objective function.
471         * @param f Objective function.
472         * @param optData Optimization data. The following data will be looked for:
473         * <ul>
474         *  <li>{@link Target}</li>
475         *  <li>{@link Weight}</li>
476         *  <li>{@link InitialGuess}</li>
477         * </ul>
478         * @return the point/value pair giving the optimal value of the objective
479         * function.
480         * @throws org.apache.commons.math3.exception.TooManyEvaluationsException if
481         * the maximal number of evaluations is exceeded.
482         * @throws DimensionMismatchException if the target, and weight arguments
483         * have inconsistent dimensions.
484         * @see BaseAbstractMultivariateVectorOptimizer#optimizeInternal(int,MultivariateVectorFunction,OptimizationData[])
485         * @since 3.1
486         * @deprecated As of 3.1. Override is necessary only until this class's generic
487         * argument is changed to {@code MultivariateDifferentiableVectorFunction}.
488         */
489        @Deprecated
490        protected PointVectorValuePair optimizeInternal(final int maxEval,
491                                                        final MultivariateDifferentiableVectorFunction f,
492                                                        OptimizationData... optData) {
493            // XXX Conversion will be removed when the generic argument of the
494            // base class becomes "MultivariateDifferentiableVectorFunction".
495            return super.optimizeInternal(maxEval, FunctionUtils.toDifferentiableMultivariateVectorFunction(f), optData);
496        }
497    
498        /** {@inheritDoc} */
499        @Override
500        protected void setUp() {
501            super.setUp();
502    
503            // Reset counter.
504            jacobianEvaluations = 0;
505    
506            // Square-root of the weight matrix.
507            weightMatrixSqrt = squareRoot(getWeight());
508    
509            // Store least squares problem characteristics.
510            // XXX The conversion won't be necessary when the generic argument of
511            // the base class becomes "MultivariateDifferentiableVectorFunction".
512            // XXX "jF" is not strictly necessary anymore but is currently more
513            // efficient than converting the value returned from "getObjectiveFunction()"
514            // every time it is used.
515            jF = FunctionUtils.toMultivariateDifferentiableVectorFunction((DifferentiableMultivariateVectorFunction) getObjectiveFunction());
516    
517            // Arrays shared with "private" and "protected" methods.
518            point = getStartPoint();
519            rows = getTarget().length;
520            cols = point.length;
521        }
522    
523        /**
524         * Computes the residuals.
525         * The residual is the difference between the observed (target)
526         * values and the model (objective function) value.
527         * There is one residual for each element of the vector-valued
528         * function.
529         *
530         * @param objectiveValue Value of the the objective function. This is
531         * the value returned from a call to
532         * {@link #computeObjectiveValue(double[]) computeObjectiveValue}
533         * (whose array argument contains the model parameters).
534         * @return the residuals.
535         * @throws DimensionMismatchException if {@code params} has a wrong
536         * length.
537         * @since 3.1
538         */
539        protected double[] computeResiduals(double[] objectiveValue) {
540            final double[] target = getTarget();
541            if (objectiveValue.length != target.length) {
542                throw new DimensionMismatchException(target.length,
543                                                     objectiveValue.length);
544            }
545    
546            final double[] residuals = new double[target.length];
547            for (int i = 0; i < target.length; i++) {
548                residuals[i] = target[i] - objectiveValue[i];
549            }
550    
551            return residuals;
552        }
553    
554        /**
555         * Computes the square-root of the weight matrix.
556         *
557         * @param m Symmetric, positive-definite (weight) matrix.
558         * @return the square-root of the weight matrix.
559         */
560        private RealMatrix squareRoot(RealMatrix m) {
561            final EigenDecomposition dec = new EigenDecomposition(m);
562            return dec.getSquareRoot();
563        }
564    }