001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math3.stat.regression;
018    
019    import java.io.Serializable;
020    import java.util.Arrays;
021    import org.apache.commons.math3.util.FastMath;
022    import org.apache.commons.math3.util.MathArrays;
023    import org.apache.commons.math3.exception.OutOfRangeException;
024    
025    /**
026     * Results of a Multiple Linear Regression model fit.
027     *
028     * @version $Id: RegressionResults.java 1392342 2012-10-01 14:08:52Z psteitz $
029     * @since 3.0
030     */
031    public class RegressionResults implements Serializable {
032    
033        /** INDEX of Sum of Squared Errors */
034        private static final int SSE_IDX = 0;
035        /** INDEX of Sum of Squares of Model */
036        private static final int SST_IDX = 1;
037        /** INDEX of R-Squared of regression */
038        private static final int RSQ_IDX = 2;
039        /** INDEX of Mean Squared Error */
040        private static final int MSE_IDX = 3;
041        /** INDEX of Adjusted R Squared */
042        private static final int ADJRSQ_IDX = 4;
043        /** UID */
044        private static final long serialVersionUID = 1l;
045        /** regression slope parameters */
046        private final double[] parameters;
047        /** variance covariance matrix of parameters */
048        private final double[][] varCovData;
049        /** boolean flag for variance covariance matrix in symm compressed storage */
050        private final boolean isSymmetricVCD;
051        /** rank of the solution */
052        @SuppressWarnings("unused")
053        private final int rank;
054        /** number of observations on which results are based */
055        private final long nobs;
056        /** boolean flag indicator of whether a constant was included*/
057        private final boolean containsConstant;
058        /** array storing global results, SSE, MSE, RSQ, adjRSQ */
059        private final double[] globalFitInfo;
060    
061        /**
062         *  Set the default constructor to private access
063         *  to prevent inadvertent instantiation
064         */
065        @SuppressWarnings("unused")
066        private RegressionResults() {
067            this.parameters = null;
068            this.varCovData = null;
069            this.rank = -1;
070            this.nobs = -1;
071            this.containsConstant = false;
072            this.isSymmetricVCD = false;
073            this.globalFitInfo = null;
074        }
075    
076        /**
077         * Constructor for Regression Results.
078         *
079         * @param parameters a double array with the regression slope estimates
080         * @param varcov the variance covariance matrix, stored either in a square matrix
081         * or as a compressed
082         * @param isSymmetricCompressed a flag which denotes that the variance covariance
083         * matrix is in symmetric compressed format
084         * @param nobs the number of observations of the regression estimation
085         * @param rank the number of independent variables in the regression
086         * @param sumy the sum of the independent variable
087         * @param sumysq the sum of the squared independent variable
088         * @param sse sum of squared errors
089         * @param containsConstant true model has constant,  false model does not have constant
090         * @param copyData if true a deep copy of all input data is made, if false only references
091         * are copied and the RegressionResults become mutable
092         */
093        public RegressionResults(
094                final double[] parameters, final double[][] varcov,
095                final boolean isSymmetricCompressed,
096                final long nobs, final int rank,
097                final double sumy, final double sumysq, final double sse,
098                final boolean containsConstant,
099                final boolean copyData) {
100            if (copyData) {
101                this.parameters = MathArrays.copyOf(parameters);
102                this.varCovData = new double[varcov.length][];
103                for (int i = 0; i < varcov.length; i++) {
104                    this.varCovData[i] = MathArrays.copyOf(varcov[i]);
105                }
106            } else {
107                this.parameters = parameters;
108                this.varCovData = varcov;
109            }
110            this.isSymmetricVCD = isSymmetricCompressed;
111            this.nobs = nobs;
112            this.rank = rank;
113            this.containsConstant = containsConstant;
114            this.globalFitInfo = new double[5];
115            Arrays.fill(this.globalFitInfo, Double.NaN);
116    
117            if (rank > 0) {
118                this.globalFitInfo[SST_IDX] = containsConstant ?
119                        (sumysq - sumy * sumy / nobs) : sumysq;
120            }
121    
122            this.globalFitInfo[SSE_IDX] = sse;
123            this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] /
124                    (nobs - rank);
125            this.globalFitInfo[RSQ_IDX] = 1.0 -
126                    this.globalFitInfo[SSE_IDX] /
127                    this.globalFitInfo[SST_IDX];
128    
129            if (!containsConstant) {
130                this.globalFitInfo[ADJRSQ_IDX] = 1.0-
131                        (1.0 - this.globalFitInfo[RSQ_IDX]) *
132                        ( (double) nobs / ( (double) (nobs - rank)));
133            } else {
134                this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) /
135                        (globalFitInfo[SST_IDX] * (nobs - rank));
136            }
137        }
138    
139        /**
140         * <p>Returns the parameter estimate for the regressor at the given index.</p>
141         *
142         * <p>A redundant regressor will have its redundancy flag set, as well as
143         *  a parameters estimated equal to {@code Double.NaN}</p>
144         *
145         * @param index Index.
146         * @return the parameters estimated for regressor at index.
147         * @throws OutOfRangeException if {@code index} is not in the interval
148         * {@code [0, number of parameters)}.
149         */
150        public double getParameterEstimate(int index) throws OutOfRangeException {
151            if (parameters == null) {
152                return Double.NaN;
153            }
154            if (index < 0 || index >= this.parameters.length) {
155                throw new OutOfRangeException(index, 0, this.parameters.length - 1);
156            }
157            return this.parameters[index];
158        }
159    
160        /**
161         * <p>Returns a copy of the regression parameters estimates.</p>
162         *
163         * <p>The parameter estimates are returned in the natural order of the data.</p>
164         *
165         * <p>A redundant regressor will have its redundancy flag set, as will
166         *  a parameter estimate equal to {@code Double.NaN}.</p>
167         *
168         * @return array of parameter estimates, null if no estimation occurred
169         */
170        public double[] getParameterEstimates() {
171            if (this.parameters == null) {
172                return null;
173            }
174            return MathArrays.copyOf(parameters);
175        }
176    
177        /**
178         * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
179         * error of the parameter estimate at index</a>,
180         * usually denoted s(b<sub>index</sub>).
181         *
182         * @param index Index.
183         * @return the standard errors associated with parameters estimated at index.
184         * @throws OutOfRangeException if {@code index} is not in the interval
185         * {@code [0, number of parameters)}.
186         */
187        public double getStdErrorOfEstimate(int index) throws OutOfRangeException {
188            if (parameters == null) {
189                return Double.NaN;
190            }
191            if (index < 0 || index >= this.parameters.length) {
192                throw new OutOfRangeException(index, 0, this.parameters.length - 1);
193            }
194            double var = this.getVcvElement(index, index);
195            if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
196                return FastMath.sqrt(var);
197            }
198            return Double.NaN;
199        }
200    
201        /**
202         * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
203         * error of the parameter estimates</a>,
204         * usually denoted s(b<sub>i</sub>).</p>
205         *
206         * <p>If there are problems with an ill conditioned design matrix then the regressor
207         * which is redundant will be assigned <code>Double.NaN</code>. </p>
208         *
209         * @return an array standard errors associated with parameters estimates,
210         *  null if no estimation occurred
211         */
212        public double[] getStdErrorOfEstimates() {
213            if (parameters == null) {
214                return null;
215            }
216            double[] se = new double[this.parameters.length];
217            for (int i = 0; i < this.parameters.length; i++) {
218                double var = this.getVcvElement(i, i);
219                if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
220                    se[i] = FastMath.sqrt(var);
221                    continue;
222                }
223                se[i] = Double.NaN;
224            }
225            return se;
226        }
227    
228        /**
229         * <p>Returns the covariance between regression parameters i and j.</p>
230         *
231         * <p>If there are problems with an ill conditioned design matrix then the covariance
232         * which involves redundant columns will be assigned {@code Double.NaN}. </p>
233         *
234         * @param i {@code i}th regression parameter.
235         * @param j {@code j}th regression parameter.
236         * @return the covariance of the parameter estimates.
237         * @throws OutOfRangeException if {@code i} or {@code j} is not in the
238         * interval {@code [0, number of parameters)}.
239         */
240        public double getCovarianceOfParameters(int i, int j) throws OutOfRangeException {
241            if (parameters == null) {
242                return Double.NaN;
243            }
244            if (i < 0 || i >= this.parameters.length) {
245                throw new OutOfRangeException(i, 0, this.parameters.length - 1);
246            }
247            if (j < 0 || j >= this.parameters.length) {
248                throw new OutOfRangeException(j, 0, this.parameters.length - 1);
249            }
250            return this.getVcvElement(i, j);
251        }
252    
253        /**
254         * <p>Returns the number of parameters estimated in the model.</p>
255         *
256         * <p>This is the maximum number of regressors, some techniques may drop
257         * redundant parameters</p>
258         *
259         * @return number of regressors, -1 if not estimated
260         */
261        public int getNumberOfParameters() {
262            if (this.parameters == null) {
263                return -1;
264            }
265            return this.parameters.length;
266        }
267    
268        /**
269         * Returns the number of observations added to the regression model.
270         *
271         * @return Number of observations, -1 if an error condition prevents estimation
272         */
273        public long getN() {
274            return this.nobs;
275        }
276    
277        /**
278         * <p>Returns the sum of squared deviations of the y values about their mean.</p>
279         *
280         * <p>This is defined as SSTO
281         * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p>
282         *
283         * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p>
284         *
285         * @return sum of squared deviations of y values
286         */
287        public double getTotalSumSquares() {
288            return this.globalFitInfo[SST_IDX];
289        }
290    
291        /**
292         * <p>Returns the sum of squared deviations of the predicted y values about
293         * their mean (which equals the mean of y).</p>
294         *
295         * <p>This is usually abbreviated SSR or SSM.  It is defined as SSM
296         * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p>
297         *
298         * <p><strong>Preconditions</strong>: <ul>
299         * <li>At least two observations (with at least two different x values)
300         * must have been added before invoking this method. If this method is
301         * invoked before a model can be estimated, <code>Double.NaN</code> is
302         * returned.
303         * </li></ul></p>
304         *
305         * @return sum of squared deviations of predicted y values
306         */
307        public double getRegressionSumSquares() {
308            return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX];
309        }
310    
311        /**
312         * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
313         * sum of squared errors</a> (SSE) associated with the regression
314         * model.</p>
315         *
316         * <p>The return value is constrained to be non-negative - i.e., if due to
317         * rounding errors the computational formula returns a negative result,
318         * 0 is returned.</p>
319         *
320         * <p><strong>Preconditions</strong>: <ul>
321         * <li>numberOfParameters data pairs
322         * must have been added before invoking this method. If this method is
323         * invoked before a model can be estimated, <code>Double,NaN</code> is
324         * returned.
325         * </li></ul></p>
326         *
327         * @return sum of squared errors associated with the regression model
328         */
329        public double getErrorSumSquares() {
330            return this.globalFitInfo[ SSE_IDX];
331        }
332    
333        /**
334         * <p>Returns the sum of squared errors divided by the degrees of freedom,
335         * usually abbreviated MSE.</p>
336         *
337         * <p>If there are fewer than <strong>numberOfParameters + 1</strong> data pairs in the model,
338         * or if there is no variation in <code>x</code>, this returns
339         * <code>Double.NaN</code>.</p>
340         *
341         * @return sum of squared deviations of y values
342         */
343        public double getMeanSquareError() {
344            return this.globalFitInfo[ MSE_IDX];
345        }
346    
347        /**
348         * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm">
349         * coefficient of multiple determination</a>,
350         * usually denoted r-square.</p>
351         *
352         * <p><strong>Preconditions</strong>: <ul>
353         * <li>At least numberOfParameters observations (with at least numberOfParameters different x values)
354         * must have been added before invoking this method. If this method is
355         * invoked before a model can be estimated, {@code Double,NaN} is
356         * returned.
357         * </li></ul></p>
358         *
359         * @return r-square, a double in the interval [0, 1]
360         */
361        public double getRSquared() {
362            return this.globalFitInfo[ RSQ_IDX];
363        }
364    
365        /**
366         * <p>Returns the adjusted R-squared statistic, defined by the formula <pre>
367         * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)]
368         * </pre>
369         * where SSR is the sum of squared residuals},
370         * SSTO is the total sum of squares}, n is the number
371         * of observations and p is the number of parameters estimated (including the intercept).</p>
372         *
373         * <p>If the regression is estimated without an intercept term, what is returned is <pre>
374         * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code>
375         * </pre></p>
376         *
377         * @return adjusted R-Squared statistic
378         */
379        public double getAdjustedRSquared() {
380            return this.globalFitInfo[ ADJRSQ_IDX];
381        }
382    
383        /**
384         * Returns true if the regression model has been computed including an intercept.
385         * In this case, the coefficient of the intercept is the first element of the
386         * {@link #getParameterEstimates() parameter estimates}.
387         * @return true if the model has an intercept term
388         */
389        public boolean hasIntercept() {
390            return this.containsConstant;
391        }
392    
393        /**
394         * Gets the i-jth element of the variance-covariance matrix.
395         *
396         * @param i first variable index
397         * @param j second variable index
398         * @return the requested variance-covariance matrix entry
399         */
400        private double getVcvElement(int i, int j) {
401            if (this.isSymmetricVCD) {
402                if (this.varCovData.length > 1) {
403                    //could be stored in upper or lower triangular
404                    if (i == j) {
405                        return varCovData[i][i];
406                    } else if (i >= varCovData[j].length) {
407                        return varCovData[i][j];
408                    } else {
409                        return varCovData[j][i];
410                    }
411                } else {//could be in single array
412                    if (i > j) {
413                        return varCovData[0][(i + 1) * i / 2 + j];
414                    } else {
415                        return varCovData[0][(j + 1) * j / 2 + i];
416                    }
417                }
418            } else {
419                return this.varCovData[i][j];
420            }
421        }
422    }