View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.descriptive.moment;
18  
19  import java.io.Serializable;
20  
21  import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
22  
23  /**
24   * Computes the variance of the available values.  By default, the unbiased
25   * "sample variance" definitional formula is used: 
26   * <p>
27   * variance = sum((x_i - mean)^2) / (n - 1) </p>
28   * <p>
29   * where mean is the {@link Mean} and <code>n</code> is the number
30   * of sample observations.</p>
31   * <p>
32   * The definitional formula does not have good numerical properties, so
33   * this implementation does not compute the statistic using the definitional
34   * formula. <ul>
35   * <li> The <code>getResult</code> method computes the variance using 
36   * updating formulas based on West's algorithm, as described in
37   * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and
38   * J. G. Lewis 1979, <i>Communications of the ACM</i>,
39   * vol. 22 no. 9, pp. 526-531.</a></li>
40   * <li> The <code>evaluate</code> methods leverage the fact that they have the
41   * full array of values in memory to execute a two-pass algorithm. 
42   * Specifically, these methods use the "corrected two-pass algorithm" from
43   * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>,
44   * American Statistician, August 1983.</li></ul>
45   * Note that adding values using <code>increment</code> or 
46   * <code>incrementAll</code> and then executing <code>getResult</code> will
47   * sometimes give a different, less accurate, result than executing 
48   * <code>evaluate</code> with the full array of values. The former approach
49   * should only be used when the full array of values is not available.</p>
50   * <p>
51   * The "population variance"  ( sum((x_i - mean)^2) / n ) can also
52   * be computed using this statistic.  The <code>isBiasCorrected</code>
53   * property determines whether the "population" or "sample" value is
54   * returned by the <code>evaluate</code> and <code>getResult</code> methods.
55   * To compute population variances, set this property to <code>false.</code>
56   * </p>
57   * <p>
58   * <strong>Note that this implementation is not synchronized.</strong> If 
59   * multiple threads access an instance of this class concurrently, and at least
60   * one of the threads invokes the <code>increment()</code> or 
61   * <code>clear()</code> method, it must be synchronized externally.</p>
62   * 
63   * @version $Revision: 619822 $ $Date: 2008-02-08 03:08:59 -0700 (Fri, 08 Feb 2008) $
64   */
65  public class Variance extends AbstractStorelessUnivariateStatistic implements Serializable {
66  
67      /** Serializable version identifier */
68      private static final long serialVersionUID = -9111962718267217978L;  
69        
70      /** SecondMoment is used in incremental calculation of Variance*/
71      protected SecondMoment moment = null;
72  
73      /**
74       * Boolean test to determine if this Variance should also increment
75       * the second moment, this evaluates to false when this Variance is
76       * constructed with an external SecondMoment as a parameter.
77       */
78      protected boolean incMoment = true;
79      
80      /**
81       * Determines whether or not bias correction is applied when computing the
82       * value of the statisic.  True means that bias is corrected.  See 
83       * {@link Variance} for details on the formula.
84       */
85      private boolean isBiasCorrected = true;
86  
87      /**
88       * Constructs a Variance with default (true) <code>isBiasCorrected</code>
89       * property.
90       */
91      public Variance() {
92          moment = new SecondMoment();
93      }
94  
95      /**
96       * Constructs a Variance based on an external second moment.
97       * 
98       * @param m2 the SecondMoment (Third or Fourth moments work
99       * here as well.)
100      */
101     public Variance(final SecondMoment m2) {
102         incMoment = false;
103         this.moment = m2;
104     }
105     
106     /**
107      * Constructs a Variance with the specified <code>isBiasCorrected</code>
108      * property
109      * 
110      * @param isBiasCorrected  setting for bias correction - true means
111      * bias will be corrected and is equivalent to using the argumentless
112      * constructor
113      */
114     public Variance(boolean isBiasCorrected) {
115         moment = new SecondMoment();
116         this.isBiasCorrected = isBiasCorrected;
117     }
118     
119     /**
120      * Constructs a Variance with the specified <code>isBiasCorrected</code>
121      * property and the supplied external second moment.
122      * 
123      * @param isBiasCorrected  setting for bias correction - true means
124      * bias will be corrected
125      * @param m2 the SecondMoment (Third or Fourth moments work
126      * here as well.)
127      */
128     public Variance(boolean isBiasCorrected, SecondMoment m2) {
129         incMoment = false;
130         this.moment = m2;
131         this.isBiasCorrected = isBiasCorrected;      
132     }
133    
134     /**
135      * {@inheritDoc}  
136      * <p>If all values are available, it is more accurate to use 
137      * {@link #evaluate(double[])} rather than adding values one at a time
138      * using this method and then executing {@link #getResult}, since
139      * <code>evaluate</code> leverages the fact that is has the full 
140      * list of values together to execute a two-pass algorithm.  
141      * See {@link Variance}.</p>
142      */
143     public void increment(final double d) {
144         if (incMoment) {
145             moment.increment(d);
146         }
147     }
148 
149     /**
150      * @see org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic#getResult()
151      */
152     public double getResult() {
153             if (moment.n == 0) {
154                 return Double.NaN;
155             } else if (moment.n == 1) {
156                 return 0d;
157             } else {
158                 if (isBiasCorrected) {
159                     return moment.m2 / ((double) moment.n - 1d);
160                 } else {
161                     return moment.m2 / ((double) moment.n);
162                 }
163             }
164     }
165 
166     /**
167      * @see org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic#getN()
168      */
169     public long getN() {
170         return moment.getN();
171     }
172     
173     /**
174      * @see org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic#clear()
175      */
176     public void clear() {
177         if (incMoment) {
178             moment.clear();
179         }
180     }
181     
182     /**
183      * Returns the variance of the entries in the input array, or 
184      * <code>Double.NaN</code> if the array is empty.
185      * <p>
186      * See {@link Variance} for details on the computing algorithm.</p>
187      * <p>
188      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
189      * <p>
190      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
191      * <p>
192      * Does not change the internal state of the statistic.</p>
193      * 
194      * @param values the input array
195      * @return the variance of the values or Double.NaN if length = 0
196      * @throws IllegalArgumentException if the array is null
197      */
198     public double evaluate(final double[] values) {
199         if (values == null) {
200             throw new IllegalArgumentException("input values array is null");
201         }
202         return evaluate(values, 0, values.length);
203     }
204 
205     /**
206      * Returns the variance of the entries in the specified portion of
207      * the input array, or <code>Double.NaN</code> if the designated subarray
208      * is empty.
209      * <p>
210      * See {@link Variance} for details on the computing algorithm.</p>
211      * <p>
212      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
213      * <p>
214      * Does not change the internal state of the statistic.</p>
215      * <p>
216      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
217      * 
218      * @param values the input array
219      * @param begin index of the first array element to include
220      * @param length the number of elements to include
221      * @return the variance of the values or Double.NaN if length = 0
222      * @throws IllegalArgumentException if the array is null or the array index
223      *  parameters are not valid
224      */
225     public double evaluate(final double[] values, final int begin, final int length) {
226 
227         double var = Double.NaN;
228 
229         if (test(values, begin, length)) {
230             clear();
231             if (length == 1) {
232                 var = 0.0;
233             } else if (length > 1) {
234                 Mean mean = new Mean();
235                 double m = mean.evaluate(values, begin, length);
236                 var = evaluate(values, m, begin, length);
237             }
238         }
239         return var;
240     }
241     
242     /**
243      * Returns the variance of the entries in the specified portion of
244      * the input array, using the precomputed mean value.  Returns 
245      * <code>Double.NaN</code> if the designated subarray is empty.
246      * <p>
247      * See {@link Variance} for details on the computing algorithm.</p>
248      * <p>
249      * The formula used assumes that the supplied mean value is the arithmetic
250      * mean of the sample data, not a known population parameter.  This method
251      * is supplied only to save computation when the mean has already been
252      * computed.</p>
253      * <p>
254      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
255      * <p>
256      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
257      * <p>
258      * Does not change the internal state of the statistic.</p>
259      * 
260      * @param values the input array
261      * @param mean the precomputed mean value
262      * @param begin index of the first array element to include
263      * @param length the number of elements to include
264      * @return the variance of the values or Double.NaN if length = 0
265      * @throws IllegalArgumentException if the array is null or the array index
266      *  parameters are not valid
267      */
268     public double evaluate(final double[] values, final double mean, 
269             final int begin, final int length) {
270         
271         double var = Double.NaN;
272 
273         if (test(values, begin, length)) {
274             if (length == 1) {
275                 var = 0.0;
276             } else if (length > 1) {
277                 double accum = 0.0;
278                 double dev = 0.0;
279                 double accum2 = 0.0;
280                 for (int i = begin; i < begin + length; i++) {
281                     dev = values[i] - mean;
282                     accum += dev * dev;
283                     accum2 += dev;
284                 }
285                 double len = (double) length;            
286                 if (isBiasCorrected) {
287                     var = (accum - (accum2 * accum2 / len)) / (len - 1.0);
288                 } else {
289                     var = (accum - (accum2 * accum2 / len)) / len;
290                 }
291             }
292         }
293         return var;
294     }
295     
296     /**
297      * Returns the variance of the entries in the input array, using the
298      * precomputed mean value.  Returns <code>Double.NaN</code> if the array
299      * is empty.
300      * <p>
301      * See {@link Variance} for details on the computing algorithm.</p>
302      * <p>
303      * If <code>isBiasCorrected</code> is <code>true</code> the formula used
304      * assumes that the supplied mean value is the arithmetic mean of the
305      * sample data, not a known population parameter.  If the mean is a known
306      * population parameter, or if the "population" version of the variance is
307      * desired, set <code>isBiasCorrected</code> to <code>false</code> before
308      * invoking this method.</p>
309      * <p>
310      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
311      * <p>
312      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
313      * <p>
314      * Does not change the internal state of the statistic.</p>
315      * 
316      * @param values the input array
317      * @param mean the precomputed mean value
318      * @return the variance of the values or Double.NaN if the array is empty
319      * @throws IllegalArgumentException if the array is null
320      */
321     public double evaluate(final double[] values, final double mean) {
322         return evaluate(values, mean, 0, values.length);
323     }
324 
325     /**
326      * @return Returns the isBiasCorrected.
327      */
328     public boolean isBiasCorrected() {
329         return isBiasCorrected;
330     }
331 
332     /**
333      * @param isBiasCorrected The isBiasCorrected to set.
334      */
335     public void setBiasCorrected(boolean isBiasCorrected) {
336         this.isBiasCorrected = isBiasCorrected;
337     }
338 
339 }