View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.descriptive;
18  
19  import java.io.Serializable;
20  import java.lang.reflect.InvocationTargetException;
21  import java.util.Arrays;
22  
23  import org.apache.commons.discovery.tools.DiscoverClass;
24  import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
25  import org.apache.commons.math.stat.descriptive.moment.Kurtosis;
26  import org.apache.commons.math.stat.descriptive.moment.Mean;
27  import org.apache.commons.math.stat.descriptive.moment.Skewness;
28  import org.apache.commons.math.stat.descriptive.moment.Variance;
29  import org.apache.commons.math.stat.descriptive.rank.Max;
30  import org.apache.commons.math.stat.descriptive.rank.Min;
31  import org.apache.commons.math.stat.descriptive.rank.Percentile;
32  import org.apache.commons.math.stat.descriptive.summary.Sum;
33  import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
34  import org.apache.commons.math.util.ResizableDoubleArray;
35  
36  
37  /**
38   * Maintains a dataset of values of a single variable and computes descriptive
39   * statistics based on stored data. The {@link #getWindowSize() windowSize}
40   * property sets a limit on the number of values that can be stored in the 
41   * dataset.  The default value, INFINITE_WINDOW, puts no limit on the size of
42   * the dataset.  This value should be used with caution, as the backing store
43   * will grow without bound in this case.  For very large datasets, 
44   * {@link SummaryStatistics}, which does not store the dataset, should be used
45   * instead of this class. If <code>windowSize</code> is not INFINITE_WINDOW and
46   * more values are added than can be stored in the dataset, new values are
47   * added in a "rolling" manner, with new values replacing the "oldest" values 
48   * in the dataset.
49   * 
50   * <p>Note: this class is not threadsafe.  Use 
51   * {@link SynchronizedDescriptiveStatistics} if concurrent access from multiple
52   * threads is required.</p>
53   *
54   * @version $Revision: 620318 $ $Date: 2008-02-10 13:17:24 -0700 (Sun, 10 Feb 2008) $
55   */
56  public class DescriptiveStatistics implements StatisticalSummary, Serializable {
57      
58      /** Serialization UID */
59      private static final long serialVersionUID = -2734185686570407433L;
60      
61      /** hold the window size **/
62      protected int windowSize = INFINITE_WINDOW;
63      
64      /** 
65       *  Stored data values
66       */
67      protected ResizableDoubleArray eDA = new ResizableDoubleArray();
68    
69      /** Mean statistic implementation - can be reset by setter. */
70      private UnivariateStatistic meanImpl = new Mean();
71      
72      /** Geometric mean statistic implementation - can be reset by setter. */
73      private UnivariateStatistic geometricMeanImpl = new GeometricMean();
74      
75      /** Kurtosis statistic implementation - can be reset by setter. */
76      private UnivariateStatistic kurtosisImpl = new Kurtosis();
77      
78      /** Maximum statistic implementation - can be reset by setter. */
79      private UnivariateStatistic maxImpl = new Max();
80      
81      /** Minimum statistic implementation - can be reset by setter. */
82      private UnivariateStatistic minImpl = new Min();
83      
84      /** Percentile statistic implementation - can be reset by setter. */
85      private UnivariateStatistic percentileImpl = new Percentile();
86      
87      /** Skewness statistic implementation - can be reset by setter. */
88      private UnivariateStatistic skewnessImpl = new Skewness();
89      
90      /** Variance statistic implementation - can be reset by setter. */
91      private UnivariateStatistic varianceImpl = new Variance();
92      
93      /** Sum of squares statistic implementation - can be reset by setter. */
94      private UnivariateStatistic sumsqImpl = new SumOfSquares();
95      
96      /** Sum statistic implementation - can be reset by setter. */
97      private UnivariateStatistic sumImpl = new Sum();
98      
99      /**
100      * Construct a DescriptiveStatistics instance with an infinite window
101      */
102     public DescriptiveStatistics() {
103     }
104     
105     /**
106      * Construct a DescriptiveStatistics instance with the specified window
107      * 
108      * @param window the window size.
109      */
110     public DescriptiveStatistics(int window) {
111         super();
112         setWindowSize(window);
113     }
114     
115     /**
116      * Create an instance of a <code>DescriptiveStatistics</code>
117      * @param cls the type of <code>DescriptiveStatistics</code> object to
118      *        create. 
119      * @return a new instance. 
120      * @throws InstantiationException is thrown if the object can not be
121      *            created.
122      * @throws IllegalAccessException is thrown if the type's default
123      *            constructor is not accessible.
124      * @deprecated to be removed in commons-math 2.0
125      */
126     public static DescriptiveStatistics newInstance(Class cls) throws InstantiationException, IllegalAccessException {
127         return (DescriptiveStatistics)cls.newInstance();
128     }
129     
130     /**
131      * Create an instance of a <code>DescriptiveStatistics</code>
132      * @return a new DescriptiveStatistics instance. 
133      * @deprecated to be removed in commons-math 2.0
134      */
135     public static DescriptiveStatistics newInstance() {
136         DescriptiveStatistics factory = null;
137         try {
138             DiscoverClass dc = new DiscoverClass();
139             factory = (DescriptiveStatistics) dc.newInstance(
140                 DescriptiveStatistics.class,
141                 "org.apache.commons.math.stat.descriptive.DescriptiveStatisticsImpl");
142         } catch(Throwable t) {
143             return new DescriptiveStatisticsImpl();
144         }
145         return factory;
146     }
147     
148     /**
149      * Represents an infinite window size.  When the {@link #getWindowSize()}
150      * returns this value, there is no limit to the number of data values
151      * that can be stored in the dataset.
152      */
153     public static final int INFINITE_WINDOW = -1;
154 
155     /**
156      * Adds the value to the dataset. If the dataset is at the maximum size
157      * (i.e., the number of stored elements equals the currently configured
158      * windowSize), the first (oldest) element in the dataset is discarded
159      * to make room for the new value.
160      * 
161      * @param v the value to be added 
162      */
163     public void addValue(double v) {
164         if (windowSize != INFINITE_WINDOW) {
165             if (getN() == windowSize) {
166                 eDA.addElementRolling(v);
167             } else if (getN() < windowSize) {
168                 eDA.addElement(v);
169             }
170         } else {
171             eDA.addElement(v);
172         }
173     }
174 
175     /** 
176      * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm">
177      * arithmetic mean </a> of the available values 
178      * @return The mean or Double.NaN if no values have been added.
179      */
180     public double getMean() {
181         return apply(meanImpl);
182     }
183 
184     /** 
185      * Returns the <a href="http://www.xycoon.com/geometric_mean.htm">
186      * geometric mean </a> of the available values
187      * @return The geometricMean, Double.NaN if no values have been added, 
188      * or if the productof the available values is less than or equal to 0.
189      */
190     public double getGeometricMean() {
191         return apply(geometricMeanImpl);
192     }
193 
194     /** 
195      * Returns the variance of the available values.
196      * @return The variance, Double.NaN if no values have been added 
197      * or 0.0 for a single value set.  
198      */
199     public double getVariance() {
200         return apply(varianceImpl);
201     }
202 
203     /** 
204      * Returns the standard deviation of the available values.
205      * @return The standard deviation, Double.NaN if no values have been added 
206      * or 0.0 for a single value set. 
207      */
208     public double getStandardDeviation() {
209         double stdDev = Double.NaN;
210         if (getN() > 0) {
211             if (getN() > 1) {
212                 stdDev = Math.sqrt(getVariance());
213             } else {
214                 stdDev = 0.0;
215             }
216         }
217         return (stdDev);
218     }
219 
220     /**
221      * Returns the skewness of the available values. Skewness is a 
222      * measure of the asymmetry of a given distribution.
223      * @return The skewness, Double.NaN if no values have been added 
224      * or 0.0 for a value set &lt;=2. 
225      */
226     public double getSkewness() {
227         return apply(skewnessImpl);
228     }
229 
230     /**
231      * Returns the Kurtosis of the available values. Kurtosis is a 
232      * measure of the "peakedness" of a distribution
233      * @return The kurtosis, Double.NaN if no values have been added, or 0.0 
234      * for a value set &lt;=3. 
235      */
236     public double getKurtosis() {
237         return apply(kurtosisImpl);
238     }
239 
240     /** 
241      * Returns the maximum of the available values
242      * @return The max or Double.NaN if no values have been added.
243      */
244     public double getMax() {
245         return apply(maxImpl);
246     }
247 
248     /** 
249     * Returns the minimum of the available values
250     * @return The min or Double.NaN if no values have been added.
251     */
252     public double getMin() {
253         return apply(minImpl);
254     }
255 
256     /** 
257      * Returns the number of available values
258      * @return The number of available values
259      */
260     public long getN() {
261         return eDA.getNumElements();
262     }
263 
264     /**
265      * Returns the sum of the values that have been added to Univariate.
266      * @return The sum or Double.NaN if no values have been added
267      */
268     public double getSum() {
269         return apply(sumImpl);
270     }
271 
272     /**
273      * Returns the sum of the squares of the available values.
274      * @return The sum of the squares or Double.NaN if no 
275      * values have been added.
276      */
277     public double getSumsq() {
278         return apply(sumsqImpl);
279     }
280 
281     /** 
282      * Resets all statistics and storage
283      */
284     public void clear() {
285         eDA.clear();
286     }
287 
288 
289     /**
290      * Returns the maximum number of values that can be stored in the
291      * dataset, or INFINITE_WINDOW (-1) if there is no limit.
292      * 
293      * @return The current window size or -1 if its Infinite.
294      */
295     public int getWindowSize() {
296         return windowSize;
297     }
298 
299     /**
300      * WindowSize controls the number of values which contribute 
301      * to the reported statistics.  For example, if 
302      * windowSize is set to 3 and the values {1,2,3,4,5} 
303      * have been added <strong> in that order</strong> 
304      * then the <i>available values</i> are {3,4,5} and all
305      * reported statistics will be based on these values
306      * @param windowSize sets the size of the window.
307      */
308     public void setWindowSize(int windowSize) {
309         if (windowSize < 1) {
310             if (windowSize != INFINITE_WINDOW) {
311                 throw new IllegalArgumentException("window size must be positive.");
312             }
313         }
314         
315         this.windowSize = windowSize;
316 
317         // We need to check to see if we need to discard elements
318         // from the front of the array.  If the windowSize is less than 
319         // the current number of elements.
320         if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) {
321             eDA.discardFrontElements(eDA.getNumElements() - windowSize);
322         }
323     }
324     
325     /**
326      * Returns the current set of values in an array of double primitives.  
327      * The order of addition is preserved.  The returned array is a fresh
328      * copy of the underlying data -- i.e., it is not a reference to the
329      * stored data.
330      * 
331      * @return returns the current set of numbers in the order in which they 
332      *         were added to this set
333      */
334     public double[] getValues() {
335         double[] copiedArray = new double[eDA.getNumElements()];
336         System.arraycopy(eDA.getElements(), 0, copiedArray,
337             0, eDA.getNumElements());
338         return copiedArray;
339     }
340 
341     /**
342      * Returns the current set of values in an array of double primitives,  
343      * sorted in ascending order.  The returned array is a fresh
344      * copy of the underlying data -- i.e., it is not a reference to the
345      * stored data.
346      * @return returns the current set of 
347      * numbers sorted in ascending order        
348      */
349     public double[] getSortedValues() {
350         double[] sort = getValues();
351         Arrays.sort(sort);
352         return sort;
353     }
354 
355     /**
356      * Returns the element at the specified index
357      * @param index The Index of the element
358      * @return return the element at the specified index
359      */
360     public double getElement(int index) {
361         return eDA.getElement(index);
362     }
363 
364     /**
365      * Returns an estimate for the pth percentile of the stored values. 
366      * <p>
367      * The implementation provided here follows the first estimation procedure presented
368      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
369      * </p><p>
370      * <strong>Preconditions</strong>:<ul>
371      * <li><code>0 &lt; p &lt; 100</code> (otherwise an 
372      * <code>IllegalArgumentException</code> is thrown)</li>
373      * <li>at least one value must be stored (returns <code>Double.NaN
374      *     </code> otherwise)</li>
375      * </ul></p>
376      * 
377      * @param p the requested percentile (scaled from 0 - 100)
378      * @return An estimate for the pth percentile of the stored data 
379      * @throws IllegalStateException if percentile implementation has been
380      *  overridden and the supplied implementation does not support setQuantile
381      * values
382      */
383     public double getPercentile(double p) {
384         if (percentileImpl instanceof Percentile) {
385             ((Percentile) percentileImpl).setQuantile(p);
386         } else {
387             try {
388                 percentileImpl.getClass().getMethod("setQuantile", 
389                         new Class[] {Double.TYPE}).invoke(percentileImpl,
390                                 new Object[] {new Double(p)});
391             } catch (NoSuchMethodException e1) { // Setter guard should prevent
392                 throw new IllegalArgumentException(
393                    "Percentile implementation does not support setQuantile");
394             } catch (IllegalAccessException e2) {
395                 throw new IllegalArgumentException(
396                     "IllegalAccessException setting quantile"); 
397             } catch (InvocationTargetException e3) {
398                 throw new IllegalArgumentException(
399                     "Error setting quantile" + e3.toString()); 
400             }
401         }
402         return apply(percentileImpl);
403     }
404     
405     /**
406      * Generates a text report displaying univariate statistics from values
407      * that have been added.  Each statistic is displayed on a separate
408      * line.
409      * 
410      * @return String with line feeds displaying statistics
411      */
412     public String toString() {
413         StringBuffer outBuffer = new StringBuffer();
414         outBuffer.append("DescriptiveStatistics:\n");
415         outBuffer.append("n: " + getN() + "\n");
416         outBuffer.append("min: " + getMin() + "\n");
417         outBuffer.append("max: " + getMax() + "\n");
418         outBuffer.append("mean: " + getMean() + "\n");
419         outBuffer.append("std dev: " + getStandardDeviation() + "\n");
420         outBuffer.append("median: " + getPercentile(50) + "\n");
421         outBuffer.append("skewness: " + getSkewness() + "\n");
422         outBuffer.append("kurtosis: " + getKurtosis() + "\n");
423         return outBuffer.toString();
424     }
425     
426     /**
427      * Apply the given statistic to the data associated with this set of statistics.
428      * @param stat the statistic to apply
429      * @return the computed value of the statistic.
430      */
431     public double apply(UnivariateStatistic stat) {
432         return stat.evaluate(eDA.getValues(), eDA.start(), eDA.getNumElements());
433     }
434 
435     // Implementation getters and setter
436     
437     /**
438      * Returns the currently configured mean implementation.
439      * 
440      * @return the UnivariateStatistic implementing the mean
441      * @since 1.2
442      */
443     public synchronized UnivariateStatistic getMeanImpl() {
444         return meanImpl;
445     }
446 
447     /**
448      * <p>Sets the implementation for the mean.</p>
449      * 
450      * @param meanImpl the UnivariateStatistic instance to use
451      * for computing the mean
452      * @since 1.2
453      */
454     public synchronized void setMeanImpl(UnivariateStatistic meanImpl) {
455         this.meanImpl = meanImpl;
456     }
457 
458     /**
459      * Returns the currently configured geometric mean implementation.
460      * 
461      * @return the UnivariateStatistic implementing the geometric mean
462      * @since 1.2
463      */
464     public synchronized UnivariateStatistic getGeometricMeanImpl() {
465         return geometricMeanImpl;
466     }
467 
468     /**
469      * <p>Sets the implementation for the gemoetric mean.</p>
470      * 
471      * @param geometricMeanImpl the UnivariateStatistic instance to use
472      * for computing the geometric mean
473      * @since 1.2
474      */
475     public synchronized void setGeometricMeanImpl(
476             UnivariateStatistic geometricMeanImpl) {
477         this.geometricMeanImpl = geometricMeanImpl;
478     }
479 
480     /**
481      * Returns the currently configured kurtosis implementation.
482      * 
483      * @return the UnivariateStatistic implementing the kurtosis
484      * @since 1.2
485      */
486     public synchronized UnivariateStatistic getKurtosisImpl() {
487         return kurtosisImpl;
488     }
489 
490     /**
491      * <p>Sets the implementation for the kurtosis.</p>
492      * 
493      * @param kurtosisImpl the UnivariateStatistic instance to use
494      * for computing the kurtosis
495      * @since 1.2
496      */
497     public synchronized void setKurtosisImpl(UnivariateStatistic kurtosisImpl) {
498         this.kurtosisImpl = kurtosisImpl;
499     }
500 
501     /**
502      * Returns the currently configured maximum implementation.
503      * 
504      * @return the UnivariateStatistic implementing the maximum
505      * @since 1.2
506      */
507     public synchronized UnivariateStatistic getMaxImpl() {
508         return maxImpl;
509     }
510 
511     /**
512      * <p>Sets the implementation for the maximum.</p>
513      * 
514      * @param maxImpl the UnivariateStatistic instance to use
515      * for computing the maximum
516      * @since 1.2
517      */
518     public synchronized void setMaxImpl(UnivariateStatistic maxImpl) {
519         this.maxImpl = maxImpl;
520     }
521 
522     /**
523      * Returns the currently configured minimum implementation.
524      * 
525      * @return the UnivariateStatistic implementing the minimum
526      * @since 1.2
527      */
528     public synchronized UnivariateStatistic getMinImpl() {
529         return minImpl;
530     }
531 
532     /**
533      * <p>Sets the implementation for the minimum.</p>
534      * 
535      * @param minImpl the UnivariateStatistic instance to use
536      * for computing the minimum
537      * @since 1.2
538      */
539     public synchronized void setMinImpl(UnivariateStatistic minImpl) {
540         this.minImpl = minImpl;
541     }
542 
543     /**
544      * Returns the currently configured percentile implementation.
545      * 
546      * @return the UnivariateStatistic implementing the percentile
547      * @since 1.2
548      */
549     public synchronized UnivariateStatistic getPercentileImpl() {
550         return percentileImpl;
551     }
552 
553     /**
554      * Sets the implementation to be used by {@link #getPercentile(double)}.
555      * The supplied <code>UnivariateStatistic</code> must provide a
556      * <code>setQuantile(double)</code> method; otherwise 
557      * <code>IllegalArgumentException</code> is thrown.
558      * 
559      * @param percentileImpl the percentileImpl to set
560      * @throws IllegalArgumentException if the supplied implementation does not
561      *  provide a <code>setQuantile</code> method
562      * @since 1.2
563      */
564     public synchronized void setPercentileImpl(
565             UnivariateStatistic percentileImpl) {
566         try {
567             percentileImpl.getClass().getMethod("setQuantile", 
568                     new Class[] {Double.TYPE}).invoke(percentileImpl,
569                             new Object[] {new Double(50.0d)});
570         } catch (NoSuchMethodException e1) { 
571             throw new IllegalArgumentException(
572                     "Percentile implementation does not support setQuantile");
573         } catch (IllegalAccessException e2) {
574             throw new IllegalArgumentException(
575                 "IllegalAccessException setting quantile"); 
576         } catch (InvocationTargetException e3) {
577             throw new IllegalArgumentException(
578                 "Error setting quantile" + e3.toString()); 
579         }
580         this.percentileImpl = percentileImpl;
581     }
582 
583     /**
584      * Returns the currently configured skewness implementation.
585      * 
586      * @return the UnivariateStatistic implementing the skewness
587      * @since 1.2
588      */
589     public synchronized UnivariateStatistic getSkewnessImpl() {
590         return skewnessImpl;
591     }
592 
593     /**
594      * <p>Sets the implementation for the skewness.</p>
595      * 
596      * @param skewnessImpl the UnivariateStatistic instance to use
597      * for computing the skewness
598      * @since 1.2
599      */
600     public synchronized void setSkewnessImpl(
601             UnivariateStatistic skewnessImpl) {
602         this.skewnessImpl = skewnessImpl;
603     }
604 
605     /**
606      * Returns the currently configured variance implementation.
607      * 
608      * @return the UnivariateStatistic implementing the variance
609      * @since 1.2
610      */
611     public synchronized UnivariateStatistic getVarianceImpl() {
612         return varianceImpl;
613     }
614 
615     /**
616      * <p>Sets the implementation for the variance.</p>
617      * 
618      * @param varianceImpl the UnivariateStatistic instance to use
619      * for computing the variance
620      * @since 1.2
621      */
622     public synchronized void setVarianceImpl(
623             UnivariateStatistic varianceImpl) {
624         this.varianceImpl = varianceImpl;
625     }
626 
627     /**
628      * Returns the currently configured sum of squares implementation.
629      * 
630      * @return the UnivariateStatistic implementing the sum of squares
631      * @since 1.2
632      */
633     public synchronized UnivariateStatistic getSumsqImpl() {
634         return sumsqImpl;
635     }
636 
637     /**
638      * <p>Sets the implementation for the sum of squares.</p>
639      * 
640      * @param sumsqImpl the UnivariateStatistic instance to use
641      * for computing the sum of squares
642      * @since 1.2
643      */
644     public synchronized void setSumsqImpl(UnivariateStatistic sumsqImpl) {
645         this.sumsqImpl = sumsqImpl;
646     }
647 
648     /**
649      * Returns the currently configured sum implementation.
650      * 
651      * @return the UnivariateStatistic implementing the sum
652      * @since 1.2
653      */
654     public synchronized UnivariateStatistic getSumImpl() {
655         return sumImpl;
656     }
657 
658     /**
659      * <p>Sets the implementation for the sum.</p>
660      * 
661      * @param sumImpl the UnivariateStatistic instance to use
662      * for computing the sum
663      * @since 1.2
664      */
665     public synchronized void setSumImpl(UnivariateStatistic sumImpl) {
666         this.sumImpl = sumImpl;
667     }   
668 }