1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math.stat.descriptive.moment;
18
19 import java.io.Serializable;
20
21 import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
22
23 /**
24 * Computes the variance of the available values. By default, the unbiased
25 * "sample variance" definitional formula is used:
26 * <p>
27 * variance = sum((x_i - mean)^2) / (n - 1) </p>
28 * <p>
29 * where mean is the {@link Mean} and <code>n</code> is the number
30 * of sample observations.</p>
31 * <p>
32 * The definitional formula does not have good numerical properties, so
33 * this implementation does not compute the statistic using the definitional
34 * formula. <ul>
35 * <li> The <code>getResult</code> method computes the variance using
36 * updating formulas based on West's algorithm, as described in
37 * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and
38 * J. G. Lewis 1979, <i>Communications of the ACM</i>,
39 * vol. 22 no. 9, pp. 526-531.</a></li>
40 * <li> The <code>evaluate</code> methods leverage the fact that they have the
41 * full array of values in memory to execute a two-pass algorithm.
42 * Specifically, these methods use the "corrected two-pass algorithm" from
43 * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>,
44 * American Statistician, August 1983.</li></ul>
45 * Note that adding values using <code>increment</code> or
46 * <code>incrementAll</code> and then executing <code>getResult</code> will
47 * sometimes give a different, less accurate, result than executing
48 * <code>evaluate</code> with the full array of values. The former approach
49 * should only be used when the full array of values is not available.</p>
50 * <p>
51 * The "population variance" ( sum((x_i - mean)^2) / n ) can also
52 * be computed using this statistic. The <code>isBiasCorrected</code>
53 * property determines whether the "population" or "sample" value is
54 * returned by the <code>evaluate</code> and <code>getResult</code> methods.
55 * To compute population variances, set this property to <code>false.</code>
56 * </p>
57 * <p>
58 * <strong>Note that this implementation is not synchronized.</strong> If
59 * multiple threads access an instance of this class concurrently, and at least
60 * one of the threads invokes the <code>increment()</code> or
61 * <code>clear()</code> method, it must be synchronized externally.</p>
62 *
63 * @version $Revision: 619822 $ $Date: 2008-02-08 03:08:59 -0700 (Fri, 08 Feb 2008) $
64 */
65 public class Variance extends AbstractStorelessUnivariateStatistic implements Serializable {
66
67 /** Serializable version identifier */
68 private static final long serialVersionUID = -9111962718267217978L;
69
70 /** SecondMoment is used in incremental calculation of Variance*/
71 protected SecondMoment moment = null;
72
73 /**
74 * Boolean test to determine if this Variance should also increment
75 * the second moment, this evaluates to false when this Variance is
76 * constructed with an external SecondMoment as a parameter.
77 */
78 protected boolean incMoment = true;
79
80 /**
81 * Determines whether or not bias correction is applied when computing the
82 * value of the statisic. True means that bias is corrected. See
83 * {@link Variance} for details on the formula.
84 */
85 private boolean isBiasCorrected = true;
86
87 /**
88 * Constructs a Variance with default (true) <code>isBiasCorrected</code>
89 * property.
90 */
91 public Variance() {
92 moment = new SecondMoment();
93 }
94
95 /**
96 * Constructs a Variance based on an external second moment.
97 *
98 * @param m2 the SecondMoment (Third or Fourth moments work
99 * here as well.)
100 */
101 public Variance(final SecondMoment m2) {
102 incMoment = false;
103 this.moment = m2;
104 }
105
106 /**
107 * Constructs a Variance with the specified <code>isBiasCorrected</code>
108 * property
109 *
110 * @param isBiasCorrected setting for bias correction - true means
111 * bias will be corrected and is equivalent to using the argumentless
112 * constructor
113 */
114 public Variance(boolean isBiasCorrected) {
115 moment = new SecondMoment();
116 this.isBiasCorrected = isBiasCorrected;
117 }
118
119 /**
120 * Constructs a Variance with the specified <code>isBiasCorrected</code>
121 * property and the supplied external second moment.
122 *
123 * @param isBiasCorrected setting for bias correction - true means
124 * bias will be corrected
125 * @param m2 the SecondMoment (Third or Fourth moments work
126 * here as well.)
127 */
128 public Variance(boolean isBiasCorrected, SecondMoment m2) {
129 incMoment = false;
130 this.moment = m2;
131 this.isBiasCorrected = isBiasCorrected;
132 }
133
134 /**
135 * {@inheritDoc}
136 * <p>If all values are available, it is more accurate to use
137 * {@link #evaluate(double[])} rather than adding values one at a time
138 * using this method and then executing {@link #getResult}, since
139 * <code>evaluate</code> leverages the fact that is has the full
140 * list of values together to execute a two-pass algorithm.
141 * See {@link Variance}.</p>
142 */
143 public void increment(final double d) {
144 if (incMoment) {
145 moment.increment(d);
146 }
147 }
148
149 /**
150 * @see org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic#getResult()
151 */
152 public double getResult() {
153 if (moment.n == 0) {
154 return Double.NaN;
155 } else if (moment.n == 1) {
156 return 0d;
157 } else {
158 if (isBiasCorrected) {
159 return moment.m2 / ((double) moment.n - 1d);
160 } else {
161 return moment.m2 / ((double) moment.n);
162 }
163 }
164 }
165
166 /**
167 * @see org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic#getN()
168 */
169 public long getN() {
170 return moment.getN();
171 }
172
173 /**
174 * @see org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic#clear()
175 */
176 public void clear() {
177 if (incMoment) {
178 moment.clear();
179 }
180 }
181
182 /**
183 * Returns the variance of the entries in the input array, or
184 * <code>Double.NaN</code> if the array is empty.
185 * <p>
186 * See {@link Variance} for details on the computing algorithm.</p>
187 * <p>
188 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
189 * <p>
190 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
191 * <p>
192 * Does not change the internal state of the statistic.</p>
193 *
194 * @param values the input array
195 * @return the variance of the values or Double.NaN if length = 0
196 * @throws IllegalArgumentException if the array is null
197 */
198 public double evaluate(final double[] values) {
199 if (values == null) {
200 throw new IllegalArgumentException("input values array is null");
201 }
202 return evaluate(values, 0, values.length);
203 }
204
205 /**
206 * Returns the variance of the entries in the specified portion of
207 * the input array, or <code>Double.NaN</code> if the designated subarray
208 * is empty.
209 * <p>
210 * See {@link Variance} for details on the computing algorithm.</p>
211 * <p>
212 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
213 * <p>
214 * Does not change the internal state of the statistic.</p>
215 * <p>
216 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
217 *
218 * @param values the input array
219 * @param begin index of the first array element to include
220 * @param length the number of elements to include
221 * @return the variance of the values or Double.NaN if length = 0
222 * @throws IllegalArgumentException if the array is null or the array index
223 * parameters are not valid
224 */
225 public double evaluate(final double[] values, final int begin, final int length) {
226
227 double var = Double.NaN;
228
229 if (test(values, begin, length)) {
230 clear();
231 if (length == 1) {
232 var = 0.0;
233 } else if (length > 1) {
234 Mean mean = new Mean();
235 double m = mean.evaluate(values, begin, length);
236 var = evaluate(values, m, begin, length);
237 }
238 }
239 return var;
240 }
241
242 /**
243 * Returns the variance of the entries in the specified portion of
244 * the input array, using the precomputed mean value. Returns
245 * <code>Double.NaN</code> if the designated subarray is empty.
246 * <p>
247 * See {@link Variance} for details on the computing algorithm.</p>
248 * <p>
249 * The formula used assumes that the supplied mean value is the arithmetic
250 * mean of the sample data, not a known population parameter. This method
251 * is supplied only to save computation when the mean has already been
252 * computed.</p>
253 * <p>
254 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
255 * <p>
256 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
257 * <p>
258 * Does not change the internal state of the statistic.</p>
259 *
260 * @param values the input array
261 * @param mean the precomputed mean value
262 * @param begin index of the first array element to include
263 * @param length the number of elements to include
264 * @return the variance of the values or Double.NaN if length = 0
265 * @throws IllegalArgumentException if the array is null or the array index
266 * parameters are not valid
267 */
268 public double evaluate(final double[] values, final double mean,
269 final int begin, final int length) {
270
271 double var = Double.NaN;
272
273 if (test(values, begin, length)) {
274 if (length == 1) {
275 var = 0.0;
276 } else if (length > 1) {
277 double accum = 0.0;
278 double dev = 0.0;
279 double accum2 = 0.0;
280 for (int i = begin; i < begin + length; i++) {
281 dev = values[i] - mean;
282 accum += dev * dev;
283 accum2 += dev;
284 }
285 double len = (double) length;
286 if (isBiasCorrected) {
287 var = (accum - (accum2 * accum2 / len)) / (len - 1.0);
288 } else {
289 var = (accum - (accum2 * accum2 / len)) / len;
290 }
291 }
292 }
293 return var;
294 }
295
296 /**
297 * Returns the variance of the entries in the input array, using the
298 * precomputed mean value. Returns <code>Double.NaN</code> if the array
299 * is empty.
300 * <p>
301 * See {@link Variance} for details on the computing algorithm.</p>
302 * <p>
303 * If <code>isBiasCorrected</code> is <code>true</code> the formula used
304 * assumes that the supplied mean value is the arithmetic mean of the
305 * sample data, not a known population parameter. If the mean is a known
306 * population parameter, or if the "population" version of the variance is
307 * desired, set <code>isBiasCorrected</code> to <code>false</code> before
308 * invoking this method.</p>
309 * <p>
310 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
311 * <p>
312 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
313 * <p>
314 * Does not change the internal state of the statistic.</p>
315 *
316 * @param values the input array
317 * @param mean the precomputed mean value
318 * @return the variance of the values or Double.NaN if the array is empty
319 * @throws IllegalArgumentException if the array is null
320 */
321 public double evaluate(final double[] values, final double mean) {
322 return evaluate(values, mean, 0, values.length);
323 }
324
325 /**
326 * @return Returns the isBiasCorrected.
327 */
328 public boolean isBiasCorrected() {
329 return isBiasCorrected;
330 }
331
332 /**
333 * @param isBiasCorrected The isBiasCorrected to set.
334 */
335 public void setBiasCorrected(boolean isBiasCorrected) {
336 this.isBiasCorrected = isBiasCorrected;
337 }
338
339 }