001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math3.stat.regression;
018    
019    import java.util.Arrays;
020    import org.apache.commons.math3.exception.util.LocalizedFormats;
021    import org.apache.commons.math3.util.FastMath;
022    import org.apache.commons.math3.util.Precision;
023    import org.apache.commons.math3.util.MathArrays;
024    
025    /**
026     * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
027     *
028     * <p>The algorithm is described in: <pre>
029     * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
030     * Author(s): Alan J. Miller
031     * Source: Journal of the Royal Statistical Society.
032     * Series C (Applied Statistics), Vol. 41, No. 2
033     * (1992), pp. 458-478
034     * Published by: Blackwell Publishing for the Royal Statistical Society
035     * Stable URL: http://www.jstor.org/stable/2347583 </pre></p>
036     *
037     * <p>This method for multiple regression forms the solution to the OLS problem
038     * by updating the QR decomposition as described by Gentleman.</p>
039     *
040     * @version $Id: MillerUpdatingRegression.java 1392358 2012-10-01 14:41:55Z psteitz $
041     * @since 3.0
042     */
043    public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
044    
045        /** number of variables in regression */
046        private final int nvars;
047        /** diagonals of cross products matrix */
048        private final double[] d;
049        /** the elements of the R`Y */
050        private final double[] rhs;
051        /** the off diagonal portion of the R matrix */
052        private final double[] r;
053        /** the tolerance for each of the variables */
054        private final double[] tol;
055        /** residual sum of squares for all nested regressions */
056        private final double[] rss;
057        /** order of the regressors */
058        private final int[] vorder;
059        /** scratch space for tolerance calc */
060        private final double[] work_tolset;
061        /** number of observations entered */
062        private long nobs = 0;
063        /** sum of squared errors of largest regression */
064        private double sserr = 0.0;
065        /** has rss been called? */
066        private boolean rss_set = false;
067        /** has the tolerance setting method been called */
068        private boolean tol_set = false;
069        /** flags for variables with linear dependency problems */
070        private final boolean[] lindep;
071        /** singular x values */
072        private final double[] x_sing;
073        /** workspace for singularity method */
074        private final double[] work_sing;
075        /** summation of Y variable */
076        private double sumy = 0.0;
077        /** summation of squared Y values */
078        private double sumsqy = 0.0;
079        /** boolean flag whether a regression constant is added */
080        private boolean hasIntercept;
081        /** zero tolerance */
082        private final double epsilon;
083        /**
084         *  Set the default constructor to private access
085         *  to prevent inadvertent instantiation
086         */
087        @SuppressWarnings("unused")
088        private MillerUpdatingRegression() {
089            this(-1, false, Double.NaN);
090        }
091    
092        /**
093         * This is the augmented constructor for the MillerUpdatingRegression class.
094         *
095         * @param numberOfVariables number of regressors to expect, not including constant
096         * @param includeConstant include a constant automatically
097         * @param errorTolerance  zero tolerance, how machine zero is determined
098         * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
099         */
100        public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
101        throws ModelSpecificationException {
102            if (numberOfVariables < 1) {
103                throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
104            }
105            if (includeConstant) {
106                this.nvars = numberOfVariables + 1;
107            } else {
108                this.nvars = numberOfVariables;
109            }
110            this.hasIntercept = includeConstant;
111            this.nobs = 0;
112            this.d = new double[this.nvars];
113            this.rhs = new double[this.nvars];
114            this.r = new double[this.nvars * (this.nvars - 1) / 2];
115            this.tol = new double[this.nvars];
116            this.rss = new double[this.nvars];
117            this.vorder = new int[this.nvars];
118            this.x_sing = new double[this.nvars];
119            this.work_sing = new double[this.nvars];
120            this.work_tolset = new double[this.nvars];
121            this.lindep = new boolean[this.nvars];
122            for (int i = 0; i < this.nvars; i++) {
123                vorder[i] = i;
124            }
125            if (errorTolerance > 0) {
126                this.epsilon = errorTolerance;
127            } else {
128                this.epsilon = -errorTolerance;
129            }
130        }
131    
132        /**
133         * Primary constructor for the MillerUpdatingRegression.
134         *
135         * @param numberOfVariables maximum number of potential regressors
136         * @param includeConstant include a constant automatically
137         * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
138         */
139        public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
140        throws ModelSpecificationException {
141            this(numberOfVariables, includeConstant, Precision.EPSILON);
142        }
143    
144        /**
145         * A getter method which determines whether a constant is included.
146         * @return true regression has an intercept, false no intercept
147         */
148        public boolean hasIntercept() {
149            return this.hasIntercept;
150        }
151    
152        /**
153         * Gets the number of observations added to the regression model.
154         * @return number of observations
155         */
156        public long getN() {
157            return this.nobs;
158        }
159    
160        /**
161         * Adds an observation to the regression model.
162         * @param x the array with regressor values
163         * @param y  the value of dependent variable given these regressors
164         * @exception ModelSpecificationException if the length of {@code x} does not equal
165         * the number of independent variables in the model
166         */
167        public void addObservation(final double[] x, final double y)
168        throws ModelSpecificationException {
169    
170            if ((!this.hasIntercept && x.length != nvars) ||
171                   (this.hasIntercept && x.length + 1 != nvars)) {
172                throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
173                        x.length, nvars);
174            }
175            if (!this.hasIntercept) {
176                include(MathArrays.copyOf(x, x.length), 1.0, y);
177            } else {
178                final double[] tmp = new double[x.length + 1];
179                System.arraycopy(x, 0, tmp, 1, x.length);
180                tmp[0] = 1.0;
181                include(tmp, 1.0, y);
182            }
183            ++nobs;
184    
185        }
186    
187        /**
188         * Adds multiple observations to the model.
189         * @param x observations on the regressors
190         * @param y observations on the regressand
191         * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
192         * the length of {@code y} or does not contain sufficient data to estimate the model
193         */
194        public void addObservations(double[][] x, double[] y) throws ModelSpecificationException {
195            if ((x == null) || (y == null) || (x.length != y.length)) {
196                throw new ModelSpecificationException(
197                      LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
198                      (x == null) ? 0 : x.length,
199                      (y == null) ? 0 : y.length);
200            }
201            if (x.length == 0) {  // Must be no y data either
202                throw new ModelSpecificationException(
203                        LocalizedFormats.NO_DATA);
204            }
205            if (x[0].length + 1 > x.length) {
206                throw new ModelSpecificationException(
207                      LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
208                      x.length, x[0].length);
209            }
210            for (int i = 0; i < x.length; i++) {
211                addObservation(x[i], y[i]);
212            }
213        }
214    
215        /**
216         * The include method is where the QR decomposition occurs. This statement forms all
217         * intermediate data which will be used for all derivative measures.
218         * According to the miller paper, note that in the original implementation the x vector
219         * is overwritten. In this implementation, the include method is passed a copy of the
220         * original data vector so that there is no contamination of the data. Additionally,
221         * this method differs slightly from Gentleman's method, in that the assumption is
222         * of dense design matrices, there is some advantage in using the original gentleman algorithm
223         * on sparse matrices.
224         *
225         * @param x observations on the regressors
226         * @param wi weight of the this observation (-1,1)
227         * @param yi observation on the regressand
228         */
229        private void include(final double[] x, final double wi, final double yi) {
230            int nextr = 0;
231            double w = wi;
232            double y = yi;
233            double xi;
234            double di;
235            double wxi;
236            double dpi;
237            double xk;
238            double _w;
239            this.rss_set = false;
240            sumy = smartAdd(yi, sumy);
241            sumsqy = smartAdd(sumsqy, yi * yi);
242            for (int i = 0; i < x.length; i++) {
243                if (w == 0.0) {
244                    return;
245                }
246                xi = x[i];
247    
248                if (xi == 0.0) {
249                    nextr += nvars - i - 1;
250                    continue;
251                }
252                di = d[i];
253                wxi = w * xi;
254                _w = w;
255                if (di != 0.0) {
256                    dpi = smartAdd(di, wxi * xi);
257                    final double tmp = wxi * xi / di;
258                    if (FastMath.abs(tmp) > Precision.EPSILON) {
259                        w = (di * w) / dpi;
260                    }
261                } else {
262                    dpi = wxi * xi;
263                    w = 0.0;
264                }
265                d[i] = dpi;
266                for (int k = i + 1; k < nvars; k++) {
267                    xk = x[k];
268                    x[k] = smartAdd(xk, -xi * r[nextr]);
269                    if (di != 0.0) {
270                        r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi;
271                    } else {
272                        r[nextr] = xk / xi;
273                    }
274                    ++nextr;
275                }
276                xk = y;
277                y = smartAdd(xk, -xi * rhs[i]);
278                if (di != 0.0) {
279                    rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
280                } else {
281                    rhs[i] = xk / xi;
282                }
283            }
284            sserr = smartAdd(sserr, w * y * y);
285        }
286    
287        /**
288         * Adds to number a and b such that the contamination due to
289         * numerical smallness of one addend does not corrupt the sum.
290         * @param a - an addend
291         * @param b - an addend
292         * @return the sum of the a and b
293         */
294        private double smartAdd(double a, double b) {
295            final double _a = FastMath.abs(a);
296            final double _b = FastMath.abs(b);
297            if (_a > _b) {
298                final double eps = _a * Precision.EPSILON;
299                if (_b > eps) {
300                    return a + b;
301                }
302                return a;
303            } else {
304                final double eps = _b * Precision.EPSILON;
305                if (_a > eps) {
306                    return a + b;
307                }
308                return b;
309            }
310        }
311    
312        /**
313         * As the name suggests,  clear wipes the internals and reorders everything in the
314         * canonical order.
315         */
316        public void clear() {
317            Arrays.fill(this.d, 0.0);
318            Arrays.fill(this.rhs, 0.0);
319            Arrays.fill(this.r, 0.0);
320            Arrays.fill(this.tol, 0.0);
321            Arrays.fill(this.rss, 0.0);
322            Arrays.fill(this.work_tolset, 0.0);
323            Arrays.fill(this.work_sing, 0.0);
324            Arrays.fill(this.x_sing, 0.0);
325            Arrays.fill(this.lindep, false);
326            for (int i = 0; i < nvars; i++) {
327                this.vorder[i] = i;
328            }
329            this.nobs = 0;
330            this.sserr = 0.0;
331            this.sumy = 0.0;
332            this.sumsqy = 0.0;
333            this.rss_set = false;
334            this.tol_set = false;
335        }
336    
337        /**
338         * This sets up tolerances for singularity testing.
339         */
340        private void tolset() {
341            int pos;
342            double total;
343            final double eps = this.epsilon;
344            for (int i = 0; i < nvars; i++) {
345                this.work_tolset[i] = Math.sqrt(d[i]);
346            }
347            tol[0] = eps * this.work_tolset[0];
348            for (int col = 1; col < nvars; col++) {
349                pos = col - 1;
350                total = work_tolset[col];
351                for (int row = 0; row < col; row++) {
352                    total += Math.abs(r[pos]) * work_tolset[row];
353                    pos += nvars - row - 2;
354                }
355                tol[col] = eps * total;
356            }
357            tol_set = true;
358        }
359    
360        /**
361         * The regcf method conducts the linear regression and extracts the
362         * parameter vector. Notice that the algorithm can do subset regression
363         * with no alteration.
364         *
365         * @param nreq how many of the regressors to include (either in canonical
366         * order, or in the current reordered state)
367         * @return an array with the estimated slope coefficients
368         * @throws ModelSpecificationException if {@code nreq} is less than 1
369         * or greater than the number of independent variables
370         */
371        private double[] regcf(int nreq) throws ModelSpecificationException {
372            int nextr;
373            if (nreq < 1) {
374                throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
375            }
376            if (nreq > this.nvars) {
377                throw new ModelSpecificationException(
378                        LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
379            }
380            if (!this.tol_set) {
381                tolset();
382            }
383            final double[] ret = new double[nreq];
384            boolean rankProblem = false;
385            for (int i = nreq - 1; i > -1; i--) {
386                if (Math.sqrt(d[i]) < tol[i]) {
387                    ret[i] = 0.0;
388                    d[i] = 0.0;
389                    rankProblem = true;
390                } else {
391                    ret[i] = rhs[i];
392                    nextr = i * (nvars + nvars - i - 1) / 2;
393                    for (int j = i + 1; j < nreq; j++) {
394                        ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
395                        ++nextr;
396                    }
397                }
398            }
399            if (rankProblem) {
400                for (int i = 0; i < nreq; i++) {
401                    if (this.lindep[i]) {
402                        ret[i] = Double.NaN;
403                    }
404                }
405            }
406            return ret;
407        }
408    
409        /**
410         * The method which checks for singularities and then eliminates the offending
411         * columns.
412         */
413        private void singcheck() {
414            int pos;
415            for (int i = 0; i < nvars; i++) {
416                work_sing[i] = Math.sqrt(d[i]);
417            }
418            for (int col = 0; col < nvars; col++) {
419                // Set elements within R to zero if they are less than tol(col) in
420                // absolute value after being scaled by the square root of their row
421                // multiplier
422                final double temp = tol[col];
423                pos = col - 1;
424                for (int row = 0; row < col - 1; row++) {
425                    if (Math.abs(r[pos]) * work_sing[row] < temp) {
426                        r[pos] = 0.0;
427                    }
428                    pos += nvars - row - 2;
429                }
430                // If diagonal element is near zero, set it to zero, set appropriate
431                // element of LINDEP, and use INCLUD to augment the projections in
432                // the lower rows of the orthogonalization.
433                lindep[col] = false;
434                if (work_sing[col] < temp) {
435                    lindep[col] = true;
436                    if (col < nvars - 1) {
437                        Arrays.fill(x_sing, 0.0);
438                        int _pi = col * (nvars + nvars - col - 1) / 2;
439                        for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) {
440                            x_sing[_xi] = r[_pi];
441                            r[_pi] = 0.0;
442                        }
443                        final double y = rhs[col];
444                        final double weight = d[col];
445                        d[col] = 0.0;
446                        rhs[col] = 0.0;
447                        this.include(x_sing, weight, y);
448                    } else {
449                        sserr += d[col] * rhs[col] * rhs[col];
450                    }
451                }
452            }
453        }
454    
455        /**
456         * Calculates the sum of squared errors for the full regression
457         * and all subsets in the following manner: <pre>
458         * rss[] ={
459         * ResidualSumOfSquares_allNvars,
460         * ResidualSumOfSquares_FirstNvars-1,
461         * ResidualSumOfSquares_FirstNvars-2,
462         * ..., ResidualSumOfSquares_FirstVariable} </pre>
463         */
464        private void ss() {
465            double total = sserr;
466            rss[nvars - 1] = sserr;
467            for (int i = nvars - 1; i > 0; i--) {
468                total += d[i] * rhs[i] * rhs[i];
469                rss[i - 1] = total;
470            }
471            rss_set = true;
472        }
473    
474        /**
475         * Calculates the cov matrix assuming only the first nreq variables are
476         * included in the calculation. The returned array contains a symmetric
477         * matrix stored in lower triangular form. The matrix will have
478         * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
479         * cov =
480         * {
481         *  cov_00,
482         *  cov_10, cov_11,
483         *  cov_20, cov_21, cov22,
484         *  ...
485         * } </pre>
486         *
487         * @param nreq how many of the regressors to include (either in canonical
488         * order, or in the current reordered state)
489         * @return an array with the variance covariance of the included
490         * regressors in lower triangular form
491         */
492        private double[] cov(int nreq) {
493            if (this.nobs <= nreq) {
494                return null;
495            }
496            double rnk = 0.0;
497            for (int i = 0; i < nreq; i++) {
498                if (!this.lindep[i]) {
499                    rnk += 1.0;
500                }
501            }
502            final double var = rss[nreq - 1] / (nobs - rnk);
503            final double[] rinv = new double[nreq * (nreq - 1) / 2];
504            inverse(rinv, nreq);
505            final double[] covmat = new double[nreq * (nreq + 1) / 2];
506            Arrays.fill(covmat, Double.NaN);
507            int pos2;
508            int pos1;
509            int start = 0;
510            double total = 0;
511            for (int row = 0; row < nreq; row++) {
512                pos2 = start;
513                if (!this.lindep[row]) {
514                    for (int col = row; col < nreq; col++) {
515                        if (!this.lindep[col]) {
516                            pos1 = start + col - row;
517                            if (row == col) {
518                                total = 1.0 / d[col];
519                            } else {
520                                total = rinv[pos1 - 1] / d[col];
521                            }
522                            for (int k = col + 1; k < nreq; k++) {
523                                if (!this.lindep[k]) {
524                                    total += rinv[pos1] * rinv[pos2] / d[k];
525                                }
526                                ++pos1;
527                                ++pos2;
528                            }
529                            covmat[ (col + 1) * col / 2 + row] = total * var;
530                        } else {
531                            pos2 += nreq - col - 1;
532                        }
533                    }
534                }
535                start += nreq - row - 1;
536            }
537            return covmat;
538        }
539    
540        /**
541         * This internal method calculates the inverse of the upper-triangular portion
542         * of the R matrix.
543         * @param rinv  the storage for the inverse of r
544         * @param nreq how many of the regressors to include (either in canonical
545         * order, or in the current reordered state)
546         */
547        private void inverse(double[] rinv, int nreq) {
548            int pos = nreq * (nreq - 1) / 2 - 1;
549            int pos1 = -1;
550            int pos2 = -1;
551            double total = 0.0;
552            Arrays.fill(rinv, Double.NaN);
553            for (int row = nreq - 1; row > 0; --row) {
554                if (!this.lindep[row]) {
555                    final int start = (row - 1) * (nvars + nvars - row) / 2;
556                    for (int col = nreq; col > row; --col) {
557                        pos1 = start;
558                        pos2 = pos;
559                        total = 0.0;
560                        for (int k = row; k < col - 1; k++) {
561                            pos2 += nreq - k - 1;
562                            if (!this.lindep[k]) {
563                                total += -r[pos1] * rinv[pos2];
564                            }
565                            ++pos1;
566                        }
567                        rinv[pos] = total - r[pos1];
568                        --pos;
569                    }
570                } else {
571                    pos -= nreq - row;
572                }
573            }
574        }
575    
576        /**
577         * In the original algorithm only the partial correlations of the regressors
578         * is returned to the user. In this implementation, we have <pre>
579         * corr =
580         * {
581         *   corrxx - lower triangular
582         *   corrxy - bottom row of the matrix
583         * }
584         * Replaces subroutines PCORR and COR of:
585         * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
586         *
587         * <p>Calculate partial correlations after the variables in rows
588         * 1, 2, ..., IN have been forced into the regression.
589         * If IN = 1, and the first row of R represents a constant in the
590         * model, then the usual simple correlations are returned.</p>
591         *
592         * <p>If IN = 0, the value returned in array CORMAT for the correlation
593         * of variables Xi & Xj is: <pre>
594         * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre></p>
595         *
596         * <p>On return, array CORMAT contains the upper triangle of the matrix of
597         * partial correlations stored by rows, excluding the 1's on the diagonal.
598         * e.g. if IN = 2, the consecutive elements returned are:
599         * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
600         * Array YCORR stores the partial correlations with the Y-variable
601         * starting with YCORR(IN+1) = partial correlation with the variable in
602         * position (IN+1). </p>
603         *
604         * @param in how many of the regressors to include (either in canonical
605         * order, or in the current reordered state)
606         * @return an array with the partial correlations of the remainder of
607         * regressors with each other and the regressand, in lower triangular form
608         */
609        public double[] getPartialCorrelations(int in) {
610            final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
611            int pos;
612            int pos1;
613            int pos2;
614            final int rms_off = -in;
615            final int wrk_off = -(in + 1);
616            final double[] rms = new double[nvars - in];
617            final double[] work = new double[nvars - in - 1];
618            double sumxx;
619            double sumxy;
620            double sumyy;
621            final int offXX = (nvars - in) * (nvars - in - 1) / 2;
622            if (in < -1 || in >= nvars) {
623                return null;
624            }
625            final int nvm = nvars - 1;
626            final int base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2;
627            if (d[in] > 0.0) {
628                rms[in + rms_off] = 1.0 / Math.sqrt(d[in]);
629            }
630            for (int col = in + 1; col < nvars; col++) {
631                pos = base_pos + col - 1 - in;
632                sumxx = d[col];
633                for (int row = in; row < col; row++) {
634                    sumxx += d[row] * r[pos] * r[pos];
635                    pos += nvars - row - 2;
636                }
637                if (sumxx > 0.0) {
638                    rms[col + rms_off] = 1.0 / Math.sqrt(sumxx);
639                } else {
640                    rms[col + rms_off] = 0.0;
641                }
642            }
643            sumyy = sserr;
644            for (int row = in; row < nvars; row++) {
645                sumyy += d[row] * rhs[row] * rhs[row];
646            }
647            if (sumyy > 0.0) {
648                sumyy = 1.0 / Math.sqrt(sumyy);
649            }
650            pos = 0;
651            for (int col1 = in; col1 < nvars; col1++) {
652                sumxy = 0.0;
653                Arrays.fill(work, 0.0);
654                pos1 = base_pos + col1 - in - 1;
655                for (int row = in; row < col1; row++) {
656                    pos2 = pos1 + 1;
657                    for (int col2 = col1 + 1; col2 < nvars; col2++) {
658                        work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2];
659                        pos2++;
660                    }
661                    sumxy += d[row] * r[pos1] * rhs[row];
662                    pos1 += nvars - row - 2;
663                }
664                pos2 = pos1 + 1;
665                for (int col2 = col1 + 1; col2 < nvars; col2++) {
666                    work[col2 + wrk_off] += d[col1] * r[pos2];
667                    ++pos2;
668                    output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
669                            work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off];
670                    ++pos;
671                }
672                sumxy += d[col1] * rhs[col1];
673                output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy;
674            }
675    
676            return output;
677        }
678    
679        /**
680         * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
681         * Move variable from position FROM to position TO in an
682         * orthogonal reduction produced by AS75.1.
683         *
684         * @param from initial position
685         * @param to destination
686         */
687        private void vmove(int from, int to) {
688            double d1;
689            double d2;
690            double X;
691            double d1new;
692            double d2new;
693            double cbar;
694            double sbar;
695            double Y;
696            int first;
697            int inc;
698            int m1;
699            int m2;
700            int mp1;
701            int pos;
702            boolean bSkipTo40 = false;
703            if (from == to) {
704                return;
705            }
706            if (!this.rss_set) {
707                ss();
708            }
709            int count = 0;
710            if (from < to) {
711                first = from;
712                inc = 1;
713                count = to - from;
714            } else {
715                first = from - 1;
716                inc = -1;
717                count = from - to;
718            }
719    
720            int m = first;
721            int idx = 0;
722            while (idx < count) {
723                m1 = m * (nvars + nvars - m - 1) / 2;
724                m2 = m1 + nvars - m - 1;
725                mp1 = m + 1;
726    
727                d1 = d[m];
728                d2 = d[mp1];
729                // Special cases.
730                if (d1 > this.epsilon || d2 > this.epsilon) {
731                    X = r[m1];
732                    if (Math.abs(X) * Math.sqrt(d1) < tol[mp1]) {
733                        X = 0.0;
734                    }
735                    if (d1 < this.epsilon || Math.abs(X) < this.epsilon) {
736                        d[m] = d2;
737                        d[mp1] = d1;
738                        r[m1] = 0.0;
739                        for (int col = m + 2; col < nvars; col++) {
740                            ++m1;
741                            X = r[m1];
742                            r[m1] = r[m2];
743                            r[m2] = X;
744                            ++m2;
745                        }
746                        X = rhs[m];
747                        rhs[m] = rhs[mp1];
748                        rhs[mp1] = X;
749                        bSkipTo40 = true;
750                        //break;
751                    } else if (d2 < this.epsilon) {
752                        d[m] = d1 * X * X;
753                        r[m1] = 1.0 / X;
754                        for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) {
755                            r[_i] /= X;
756                        }
757                        rhs[m] = rhs[m] / X;
758                        bSkipTo40 = true;
759                        //break;
760                    }
761                    if (!bSkipTo40) {
762                        d1new = d2 + d1 * X * X;
763                        cbar = d2 / d1new;
764                        sbar = X * d1 / d1new;
765                        d2new = d1 * cbar;
766                        d[m] = d1new;
767                        d[mp1] = d2new;
768                        r[m1] = sbar;
769                        for (int col = m + 2; col < nvars; col++) {
770                            ++m1;
771                            Y = r[m1];
772                            r[m1] = cbar * r[m2] + sbar * Y;
773                            r[m2] = Y - X * r[m2];
774                            ++m2;
775                        }
776                        Y = rhs[m];
777                        rhs[m] = cbar * rhs[mp1] + sbar * Y;
778                        rhs[mp1] = Y - X * rhs[mp1];
779                    }
780                }
781                if (m > 0) {
782                    pos = m;
783                    for (int row = 0; row < m; row++) {
784                        X = r[pos];
785                        r[pos] = r[pos - 1];
786                        r[pos - 1] = X;
787                        pos += nvars - row - 2;
788                    }
789                }
790                // Adjust variable order (VORDER), the tolerances (TOL) and
791                // the vector of residual sums of squares (RSS).
792                m1 = vorder[m];
793                vorder[m] = vorder[mp1];
794                vorder[mp1] = m1;
795                X = tol[m];
796                tol[m] = tol[mp1];
797                tol[mp1] = X;
798                rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
799    
800                m += inc;
801                ++idx;
802            }
803        }
804    
805        /**
806         * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2
807         *
808         * <p> Re-order the variables in an orthogonal reduction produced by
809         * AS75.1 so that the N variables in LIST start at position POS1,
810         * though will not necessarily be in the same order as in LIST.
811         * Any variables in VORDER before position POS1 are not moved.
812         * Auxiliary routine called: VMOVE. </p>
813         *
814         * <p>This internal method reorders the regressors.</p>
815         *
816         * @param list the regressors to move
817         * @param pos1 where the list will be placed
818         * @return -1 error, 0 everything ok
819         */
820        private int reorderRegressors(int[] list, int pos1) {
821            int next;
822            int i;
823            int l;
824            if (list.length < 1 || list.length > nvars + 1 - pos1) {
825                return -1;
826            }
827            next = pos1;
828            i = pos1;
829            while (i < nvars) {
830                l = vorder[i];
831                for (int j = 0; j < list.length; j++) {
832                    if (l == list[j] && i > next) {
833                        this.vmove(i, next);
834                        ++next;
835                        if (next >= list.length + pos1) {
836                            return 0;
837                        } else {
838                            break;
839                        }
840                    }
841                }
842                ++i;
843            }
844            return 0;
845        }
846    
847        /**
848         * Gets the diagonal of the Hat matrix also known as the leverage matrix.
849         *
850         * @param  row_data returns the diagonal of the hat matrix for this observation
851         * @return the diagonal element of the hatmatrix
852         */
853        public double getDiagonalOfHatMatrix(double[] row_data) {
854            double[] wk = new double[this.nvars];
855            int pos;
856            double total;
857    
858            if (row_data.length > nvars) {
859                return Double.NaN;
860            }
861            double[] xrow;
862            if (this.hasIntercept) {
863                xrow = new double[row_data.length + 1];
864                xrow[0] = 1.0;
865                System.arraycopy(row_data, 0, xrow, 1, row_data.length);
866            } else {
867                xrow = row_data;
868            }
869            double hii = 0.0;
870            for (int col = 0; col < xrow.length; col++) {
871                if (Math.sqrt(d[col]) < tol[col]) {
872                    wk[col] = 0.0;
873                } else {
874                    pos = col - 1;
875                    total = xrow[col];
876                    for (int row = 0; row < col; row++) {
877                        total = smartAdd(total, -wk[row] * r[pos]);
878                        pos += nvars - row - 2;
879                    }
880                    wk[col] = total;
881                    hii = smartAdd(hii, (total * total) / d[col]);
882                }
883            }
884            return hii;
885        }
886    
887        /**
888         * Gets the order of the regressors, useful if some type of reordering
889         * has been called. Calling regress with int[]{} args will trigger
890         * a reordering.
891         *
892         * @return int[] with the current order of the regressors
893         */
894        public int[] getOrderOfRegressors(){
895            return MathArrays.copyOf(vorder);
896        }
897    
898        /**
899         * Conducts a regression on the data in the model, using all regressors.
900         *
901         * @return RegressionResults the structure holding all regression results
902         * @exception  ModelSpecificationException - thrown if number of observations is
903         * less than the number of variables
904         */
905        public RegressionResults regress() throws ModelSpecificationException {
906            return regress(this.nvars);
907        }
908    
909        /**
910         * Conducts a regression on the data in the model, using a subset of regressors.
911         *
912         * @param numberOfRegressors many of the regressors to include (either in canonical
913         * order, or in the current reordered state)
914         * @return RegressionResults the structure holding all regression results
915         * @exception  ModelSpecificationException - thrown if number of observations is
916         * less than the number of variables or number of regressors requested
917         * is greater than the regressors in the model
918         */
919        public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException {
920            if (this.nobs <= numberOfRegressors) {
921               throw new ModelSpecificationException(
922                       LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
923                       this.nobs, numberOfRegressors);
924            }
925            if( numberOfRegressors > this.nvars ){
926                throw new ModelSpecificationException(
927                        LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
928            }
929    
930            tolset();
931            singcheck();
932    
933            double[] beta = this.regcf(numberOfRegressors);
934    
935            ss();
936    
937            double[] cov = this.cov(numberOfRegressors);
938    
939            int rnk = 0;
940            for (int i = 0; i < this.lindep.length; i++) {
941                if (!this.lindep[i]) {
942                    ++rnk;
943                }
944            }
945    
946            boolean needsReorder = false;
947            for (int i = 0; i < numberOfRegressors; i++) {
948                if (this.vorder[i] != i) {
949                    needsReorder = true;
950                    break;
951                }
952            }
953            if (!needsReorder) {
954                return new RegressionResults(
955                        beta, new double[][]{cov}, true, this.nobs, rnk,
956                        this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
957            } else {
958                double[] betaNew = new double[beta.length];
959                double[] covNew = new double[cov.length];
960    
961                int[] newIndices = new int[beta.length];
962                for (int i = 0; i < nvars; i++) {
963                    for (int j = 0; j < numberOfRegressors; j++) {
964                        if (this.vorder[j] == i) {
965                            betaNew[i] = beta[ j];
966                            newIndices[i] = j;
967                        }
968                    }
969                }
970    
971                int idx1 = 0;
972                int idx2;
973                int _i;
974                int _j;
975                for (int i = 0; i < beta.length; i++) {
976                    _i = newIndices[i];
977                    for (int j = 0; j <= i; j++, idx1++) {
978                        _j = newIndices[j];
979                        if (_i > _j) {
980                            idx2 = _i * (_i + 1) / 2 + _j;
981                        } else {
982                            idx2 = _j * (_j + 1) / 2 + _i;
983                        }
984                        covNew[idx1] = cov[idx2];
985                    }
986                }
987                return new RegressionResults(
988                        betaNew, new double[][]{covNew}, true, this.nobs, rnk,
989                        this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
990            }
991        }
992    
993        /**
994         * Conducts a regression on the data in the model, using regressors in array
995         * Calling this method will change the internal order of the regressors
996         * and care is required in interpreting the hatmatrix.
997         *
998         * @param  variablesToInclude array of variables to include in regression
999         * @return RegressionResults the structure holding all regression results
1000         * @exception  ModelSpecificationException - thrown if number of observations is
1001         * less than the number of variables, the number of regressors requested
1002         * is greater than the regressors in the model or a regressor index in
1003         * regressor array does not exist
1004         */
1005        public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException {
1006            if (variablesToInclude.length > this.nvars) {
1007                throw new ModelSpecificationException(
1008                        LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
1009            }
1010            if (this.nobs <= this.nvars) {
1011                throw new ModelSpecificationException(
1012                        LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
1013                        this.nobs, this.nvars);
1014            }
1015            Arrays.sort(variablesToInclude);
1016            int iExclude = 0;
1017            for (int i = 0; i < variablesToInclude.length; i++) {
1018                if (i >= this.nvars) {
1019                    throw new ModelSpecificationException(
1020                            LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
1021                }
1022                if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
1023                    variablesToInclude[i] = -1;
1024                    ++iExclude;
1025                }
1026            }
1027            int[] series;
1028            if (iExclude > 0) {
1029                int j = 0;
1030                series = new int[variablesToInclude.length - iExclude];
1031                for (int i = 0; i < variablesToInclude.length; i++) {
1032                    if (variablesToInclude[i] > -1) {
1033                        series[j] = variablesToInclude[i];
1034                        ++j;
1035                    }
1036                }
1037            } else {
1038                series = variablesToInclude;
1039            }
1040    
1041            reorderRegressors(series, 0);
1042            tolset();
1043            singcheck();
1044    
1045            double[] beta = this.regcf(series.length);
1046    
1047            ss();
1048    
1049            double[] cov = this.cov(series.length);
1050    
1051            int rnk = 0;
1052            for (int i = 0; i < this.lindep.length; i++) {
1053                if (!this.lindep[i]) {
1054                    ++rnk;
1055                }
1056            }
1057    
1058            boolean needsReorder = false;
1059            for (int i = 0; i < this.nvars; i++) {
1060                if (this.vorder[i] != series[i]) {
1061                    needsReorder = true;
1062                    break;
1063                }
1064            }
1065            if (!needsReorder) {
1066                return new RegressionResults(
1067                        beta, new double[][]{cov}, true, this.nobs, rnk,
1068                        this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1069            } else {
1070                double[] betaNew = new double[beta.length];
1071                int[] newIndices = new int[beta.length];
1072                for (int i = 0; i < series.length; i++) {
1073                    for (int j = 0; j < this.vorder.length; j++) {
1074                        if (this.vorder[j] == series[i]) {
1075                            betaNew[i] = beta[ j];
1076                            newIndices[i] = j;
1077                        }
1078                    }
1079                }
1080                double[] covNew = new double[cov.length];
1081                int idx1 = 0;
1082                int idx2;
1083                int _i;
1084                int _j;
1085                for (int i = 0; i < beta.length; i++) {
1086                    _i = newIndices[i];
1087                    for (int j = 0; j <= i; j++, idx1++) {
1088                        _j = newIndices[j];
1089                        if (_i > _j) {
1090                            idx2 = _i * (_i + 1) / 2 + _j;
1091                        } else {
1092                            idx2 = _j * (_j + 1) / 2 + _i;
1093                        }
1094                        covNew[idx1] = cov[idx2];
1095                    }
1096                }
1097                return new RegressionResults(
1098                        betaNew, new double[][]{covNew}, true, this.nobs, rnk,
1099                        this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1100            }
1101        }
1102    }