View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.descriptive;
18  
19  import java.io.Serializable;
20  
21  import org.apache.commons.discovery.tools.DiscoverClass;
22  import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
23  import org.apache.commons.math.stat.descriptive.moment.Mean;
24  import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
25  import org.apache.commons.math.stat.descriptive.moment.Variance;
26  import org.apache.commons.math.stat.descriptive.rank.Max;
27  import org.apache.commons.math.stat.descriptive.rank.Min;
28  import org.apache.commons.math.stat.descriptive.summary.Sum;
29  import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
30  import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
31  import org.apache.commons.math.util.MathUtils;
32  
33  /**
34   * <p>Computes summary statistics for a stream of data values added using the 
35   * {@link #addValue(double) addValue} method. The data values are not stored in
36   * memory, so this class can be used to compute statistics for very large
37   * data streams.</p>
38   * 
39   * <p>The {@link StorelessUnivariateStatistic} instances used to maintain
40   * summary state and compute statistics are configurable via setters.
41   * For example, the default implementation for the variance can be overridden by
42   * calling {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual
43   * parameters to these methods must implement the 
44   * {@link StorelessUnivariateStatistic} interface and configuration must be
45   * completed before <code>addValue</code> is called. No configuration is
46   * necessary to use the default, commons-math provided implementations.</p>
47   * 
48   * <p>Note: This class is not thread-safe. Use 
49   * {@link SynchronizedSummaryStatistics} if concurrent access from multiple
50   * threads is required.</p>
51   *
52   * @version $Revision: 620312 $ $Date: 2008-02-10 12:28:59 -0700 (Sun, 10 Feb 2008) $
53   */
54  public class SummaryStatistics implements StatisticalSummary, Serializable {
55  
56      /** Serialization UID */
57      private static final long serialVersionUID = -3346512372447011854L;
58       
59      /**
60       * Create an instance of a <code>SummaryStatistics</code>
61       * 
62       * @param cls the type of <code>SummaryStatistics</code> object to
63       *        create. 
64       * @return a new instance. 
65       * @deprecated to be removed in commons-math 2.0
66       * @throws InstantiationException is thrown if the object can not be
67       *            created.
68       * @throws IllegalAccessException is thrown if the type's default
69       *            constructor is not accessible.
70       */
71      public static SummaryStatistics newInstance(Class cls) throws 
72          InstantiationException, IllegalAccessException {
73          return (SummaryStatistics)cls.newInstance();
74      }
75      
76      /**
77       * Create an instance of a <code>SummaryStatistics</code>
78       * 
79       * @return a new SummaryStatistics instance.
80       * @deprecated to be removed in commons-math 2.0 
81       */
82      public static SummaryStatistics newInstance() {
83          SummaryStatistics instance = null;
84          try {
85              DiscoverClass dc = new DiscoverClass();
86              instance = (SummaryStatistics) dc.newInstance(
87                  SummaryStatistics.class,
88                  "org.apache.commons.math.stat.descriptive.SummaryStatisticsImpl");
89          } catch(Throwable t) {
90              return new SummaryStatisticsImpl();
91          }
92          return instance;
93      }
94      
95      /**
96       * Construct a SummaryStatistics instance
97       */
98      public SummaryStatistics() {
99      }
100     
101     /** count of values that have been added */
102     protected long n = 0;
103     
104     /** SecondMoment is used to compute the mean and variance */
105     protected SecondMoment secondMoment = new SecondMoment();
106     
107     /** sum of values that have been added */
108     protected Sum sum = new Sum();
109 
110     /** sum of the square of each value that has been added */
111     protected SumOfSquares sumsq = new SumOfSquares();
112 
113     /** min of values that have been added */
114     protected Min min = new Min();
115 
116     /** max of values that have been added */
117     protected Max max = new Max();
118 
119     /** sumLog of values that have been added */
120     protected SumOfLogs sumLog = new SumOfLogs();
121 
122     /** geoMean of values that have been added */
123     protected GeometricMean geoMean = new GeometricMean(sumLog);
124 
125     /** mean of values that have been added */
126     protected Mean mean = new Mean();
127 
128     /** variance of values that have been added */
129     protected Variance variance = new Variance();
130     
131     /** Sum statistic implementation - can be reset by setter. */
132     private StorelessUnivariateStatistic sumImpl = sum;
133     
134     /** Sum of squares statistic implementation - can be reset by setter. */
135     private StorelessUnivariateStatistic sumsqImpl = sumsq;
136     
137     /** Minimum statistic implementation - can be reset by setter. */
138     private StorelessUnivariateStatistic minImpl = min;
139     
140     /** Maximum statistic implementation - can be reset by setter. */
141     private StorelessUnivariateStatistic maxImpl = max;
142     
143     /** Sum of log statistic implementation - can be reset by setter. */
144     private StorelessUnivariateStatistic sumLogImpl = sumLog;
145     
146     /** Geometric mean statistic implementation - can be reset by setter. */
147     private StorelessUnivariateStatistic geoMeanImpl = geoMean;
148     
149     /** Mean statistic implementation - can be reset by setter. */
150     private StorelessUnivariateStatistic meanImpl = mean;
151     
152     /** Variance statistic implementation - can be reset by setter. */
153     private StorelessUnivariateStatistic varianceImpl = variance;
154 
155     /**
156      * Return a {@link StatisticalSummaryValues} instance reporting current
157      * statistics.
158      * 
159      * @return Current values of statistics 
160      */
161     public StatisticalSummary getSummary() {
162         return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
163                 getMax(), getMin(), getSum());
164     }
165     
166     /**
167      * Add a value to the data
168      * 
169      * @param value  the value to add
170      */
171     public void addValue(double value) {
172         sumImpl.increment(value);
173         sumsqImpl.increment(value);
174         minImpl.increment(value);
175         maxImpl.increment(value);
176         sumLogImpl.increment(value);
177         secondMoment.increment(value);
178         // If mean, variance or geomean have been overridden,
179         // need to increment these
180         if (!(meanImpl instanceof Mean)) {
181                 meanImpl.increment(value);
182         }
183         if (!(varianceImpl instanceof Variance)) {
184             varianceImpl.increment(value);
185         }
186         if (!(geoMeanImpl instanceof GeometricMean)) {
187             geoMeanImpl.increment(value);
188         }
189         n++;
190     }
191 
192     /** 
193      * Returns the number of available values
194      * @return The number of available values
195      */
196     public long getN() {
197         return n;
198     }
199 
200     /**
201      * Returns the sum of the values that have been added
202      * @return The sum or <code>Double.NaN</code> if no values have been added
203      */
204     public double getSum() {
205         return sumImpl.getResult();
206     }
207 
208     /**
209      * Returns the sum of the squares of the values that have been added.
210      * <p>
211      *  Double.NaN is returned if no values have been added.</p>
212      * 
213      * @return The sum of squares
214      */
215     public double getSumsq() {
216         return sumsqImpl.getResult();
217     }
218 
219     /**
220      * Returns the mean of the values that have been added.
221      * <p>
222      *  Double.NaN is returned if no values have been added.</p>
223      * 
224      * @return the mean
225      */
226     public double getMean() {
227       if (mean == meanImpl) {
228           return new Mean(secondMoment).getResult();
229       } else {
230           return meanImpl.getResult();
231       }
232     }
233 
234     /**
235      * Returns the standard deviation of the values that have been added.
236      * <p>
237      *  Double.NaN is returned if no values have been added.</p>
238      * 
239      * @return the standard deviation
240      */
241     public double getStandardDeviation() {
242         double stdDev = Double.NaN;
243         if (getN() > 0) {
244             if (getN() > 1) {
245                 stdDev = Math.sqrt(getVariance());
246             } else {
247                 stdDev = 0.0;
248             }
249         }
250         return (stdDev);
251     }
252 
253     /**
254      * Returns the variance of the values that have been added.
255      * <p>
256      *  Double.NaN is returned if no values have been added.</p>
257      *
258      * @return the variance 
259      */
260     public double getVariance() {
261         if (varianceImpl == variance) {
262             return new Variance(secondMoment).getResult();
263         } else {
264             return varianceImpl.getResult();
265         }
266     }
267 
268     /**
269      * Returns the maximum of the values that have been added.
270      * <p>
271      *  Double.NaN is returned if no values have been added.</p>
272      *
273      * @return the maximum  
274      */
275     public double getMax() {
276         return maxImpl.getResult();
277     }
278 
279     /**
280      * Returns the minimum of the values that have been added.
281      * <p>
282      *  Double.NaN is returned if no values have been added.</p>
283      *
284      * @return the minimum  
285      */
286     public double getMin() {
287         return minImpl.getResult();
288     }
289 
290     /**
291      * Returns the geometric mean of the values that have been added.
292      * <p>
293      *  Double.NaN is returned if no values have been added.</p>
294      *
295      * @return the geometric mean  
296      */
297     public double getGeometricMean() {
298         return geoMeanImpl.getResult();
299     }
300     
301     /**
302      * Returns the sum of the logs of the values that have been added.
303      * <p>
304      *  Double.NaN is returned if no values have been added.</p>
305      *
306      * @return the sum of logs
307      * @since 1.2
308      */
309     public double getSumOfLogs() {
310         return sumLogImpl.getResult();
311     }
312     
313     /**
314      * Generates a text report displaying
315      * summary statistics from values that
316      * have been added.
317      * @return String with line feeds displaying statistics
318      * @since 1.2
319      */
320     public String toString() {
321         StringBuffer outBuffer = new StringBuffer();
322         outBuffer.append("SummaryStatistics:\n");
323         outBuffer.append("n: " + getN() + "\n");
324         outBuffer.append("min: " + getMin() + "\n");
325         outBuffer.append("max: " + getMax() + "\n");
326         outBuffer.append("mean: " + getMean() + "\n");
327         outBuffer.append("geometric mean: " + getGeometricMean() + "\n");
328         outBuffer.append("variance: " + getVariance() + "\n");
329         outBuffer.append("sum of squares: " + getSumsq() + "\n");
330         outBuffer.append("standard deviation: " + getStandardDeviation() + "\n");
331         outBuffer.append("sum of logs: " + getSumOfLogs() + "\n");
332         return outBuffer.toString();
333     }
334 
335     /** 
336      * Resets all statistics and storage
337      */
338     public void clear() {
339         this.n = 0;
340         minImpl.clear();
341         maxImpl.clear();
342         sumImpl.clear();
343         sumLogImpl.clear();
344         sumsqImpl.clear();
345         geoMeanImpl.clear();
346         secondMoment.clear();
347         if (meanImpl != mean) {
348             meanImpl.clear();
349         }
350         if (varianceImpl != variance) {
351             varianceImpl.clear();
352         }
353     }
354     
355     /**
356      * Returns true iff <code>object</code> is a <code>SummaryStatistics</code>
357      * instance and all statistics have the same values as this.
358      * @param object the object to test equality against.
359      * @return true if object equals this
360      */
361     public boolean equals(Object object) {
362         if (object == this ) {
363             return true;
364         }
365         if (object instanceof SummaryStatistics == false) {
366             return false;
367         }
368         SummaryStatistics stat = (SummaryStatistics) object;
369         return (MathUtils.equals(stat.getGeometricMean(), 
370                 this.getGeometricMean()) &&
371                 MathUtils.equals(stat.getMax(), this.getMax()) && 
372                 MathUtils.equals(stat.getMean(),this.getMean()) &&
373                 MathUtils.equals(stat.getMin(),this.getMin()) &&
374                 MathUtils.equals(stat.getN(), this.getN()) &&
375                 MathUtils.equals(stat.getSum(), this.getSum()) &&
376                 MathUtils.equals(stat.getSumsq(),this.getSumsq()) &&
377                 MathUtils.equals(stat.getVariance(),this.getVariance()));
378     }
379     
380     /**
381      * Returns hash code based on values of statistics
382      * 
383      * @return hash code
384      */
385     public int hashCode() {
386         int result = 31 + MathUtils.hash(getGeometricMean());
387         result = result * 31 + MathUtils.hash(getGeometricMean());
388         result = result * 31 + MathUtils.hash(getMax());
389         result = result * 31 + MathUtils.hash(getMean());
390         result = result * 31 + MathUtils.hash(getMin());
391         result = result * 31 + MathUtils.hash(getN());
392         result = result * 31 + MathUtils.hash(getSum());
393         result = result * 31 + MathUtils.hash(getSumsq());
394         result = result * 31 + MathUtils.hash(getVariance());
395         return result;
396     }
397 
398     // Getters and setters for statistics implementations
399     /**
400      * Returns the currently configured Sum implementation
401      * 
402      * @return the StorelessUnivariateStatistic implementing the sum
403      * @since 1.2
404      */
405     public StorelessUnivariateStatistic getSumImpl() {
406         return sumImpl;
407     }
408 
409     /**
410      * <p>Sets the implementation for the Sum.</p>
411      * <p>This method must be activated before any data has been added - i.e.,
412      * before {@link #addValue(double) addValue} has been used to add data; 
413      * otherwise an IllegalStateException will be thrown.</p>
414      * 
415      * @param sumImpl the StorelessUnivariateStatistic instance to use
416      * for computing the Sum
417      * @throws IllegalStateException if data has already been added
418      *  (i.e if n > 0)
419      * @since 1.2
420      */
421     public void setSumImpl(StorelessUnivariateStatistic sumImpl) {
422         checkEmpty();
423         this.sumImpl = sumImpl;
424     }
425 
426     /**
427      * Returns the currently configured sum of squares implementation
428      * 
429      * @return the StorelessUnivariateStatistic implementing the sum of squares
430      * @since 1.2
431      */
432     public StorelessUnivariateStatistic getSumsqImpl() {
433         return sumsqImpl;
434     }
435 
436     /**
437      * <p>Sets the implementation for the sum of squares.</p>
438      * <p>This method must be activated before any data has been added - i.e.,
439      * before {@link #addValue(double) addValue} has been used to add data; 
440      * otherwise an IllegalStateException will be thrown.</p>
441      * 
442      * @param sumsqImpl the StorelessUnivariateStatistic instance to use
443      * for computing the sum of squares
444      * @throws IllegalStateException if data has already been added
445      *  (i.e if n > 0)
446      * @since 1.2
447      */
448     public void setSumsqImpl(
449             StorelessUnivariateStatistic sumsqImpl) {
450         checkEmpty();
451         this.sumsqImpl = sumsqImpl;
452     }
453 
454     /**
455      * Returns the currently configured minimum implementation
456      * 
457      * @return the StorelessUnivariateStatistic implementing the minimum
458      * @since 1.2
459      */
460     public StorelessUnivariateStatistic getMinImpl() {
461         return minImpl;
462     }
463 
464     /**
465      * <p>Sets the implementation for the minimum.</p>
466      * <p>This method must be activated before any data has been added - i.e.,
467      * before {@link #addValue(double) addValue} has been used to add data; 
468      * otherwise an IllegalStateException will be thrown.</p>
469      * 
470      * @param minImpl the StorelessUnivariateStatistic instance to use
471      * for computing the minimum
472      * @throws IllegalStateException if data has already been added
473      *  (i.e if n > 0)
474      * @since 1.2
475      */
476     public void setMinImpl(StorelessUnivariateStatistic minImpl) {
477         checkEmpty();
478         this.minImpl = minImpl;
479     }
480 
481     /**
482      * Returns the currently configured maximum implementation
483      * 
484      * @return the StorelessUnivariateStatistic implementing the maximum
485      * @since 1.2
486      */
487     public StorelessUnivariateStatistic getMaxImpl() {
488         return maxImpl;
489     }
490 
491     /**
492      * <p>Sets the implementation for the maximum.</p>
493      * <p>This method must be activated before any data has been added - i.e.,
494      * before {@link #addValue(double) addValue} has been used to add data; 
495      * otherwise an IllegalStateException will be thrown.</p>
496      * 
497      * @param maxImpl the StorelessUnivariateStatistic instance to use
498      * for computing the maximum
499      * @throws IllegalStateException if data has already been added
500      *  (i.e if n > 0)
501      * @since 1.2
502      */
503     public void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
504         checkEmpty();
505         this.maxImpl = maxImpl;
506     }
507 
508     /**
509      * Returns the currently configured sum of logs implementation
510      * 
511      * @return the StorelessUnivariateStatistic implementing the log sum
512      * @since 1.2
513      */
514     public StorelessUnivariateStatistic getSumLogImpl() {
515         return sumLogImpl;
516     }
517 
518     /**
519      * <p>Sets the implementation for the sum of logs.</p>
520      * <p>This method must be activated before any data has been added - i.e.,
521      * before {@link #addValue(double) addValue} has been used to add data; 
522      * otherwise an IllegalStateException will be thrown.</p>
523      * 
524      * @param sumLogImpl the StorelessUnivariateStatistic instance to use
525      * for computing the log sum
526      * @throws IllegalStateException if data has already been added 
527      *  (i.e if n > 0)
528      * @since 1.2
529      */
530     public void setSumLogImpl(
531             StorelessUnivariateStatistic sumLogImpl) {
532         checkEmpty();
533         this.sumLogImpl = sumLogImpl;
534         geoMean.setSumLogImpl(sumLogImpl);
535     }
536 
537     /**
538      * Returns the currently configured geometric mean implementation
539      * 
540      * @return the StorelessUnivariateStatistic implementing the geometric mean
541      * @since 1.2
542      */
543     public StorelessUnivariateStatistic getGeoMeanImpl() {
544         return geoMeanImpl;
545     }
546 
547     /**
548      * <p>Sets the implementation for the geometric mean.</p>
549      * <p>This method must be activated before any data has been added - i.e.,
550      * before {@link #addValue(double) addValue} has been used to add data; 
551      * otherwise an IllegalStateException will be thrown.</p>
552      * 
553      * @param geoMeanImpl the StorelessUnivariateStatistic instance to use
554      * for computing the geometric mean
555      * @throws IllegalStateException if data has already been added
556      *  (i.e if n > 0)
557      * @since 1.2
558      */
559     public void setGeoMeanImpl(
560             StorelessUnivariateStatistic geoMeanImpl) {
561         checkEmpty();
562         this.geoMeanImpl = geoMeanImpl;
563     }
564 
565     /**
566      * Returns the currently configured mean implementation
567      * 
568      * @return the StorelessUnivariateStatistic implementing the mean
569      * @since 1.2
570      */
571     public StorelessUnivariateStatistic getMeanImpl() {
572         return meanImpl;
573     }
574 
575     /**
576      * <p>Sets the implementation for the mean.</p>
577      * <p>This method must be activated before any data has been added - i.e.,
578      * before {@link #addValue(double) addValue} has been used to add data; 
579      * otherwise an IllegalStateException will be thrown.</p>
580      * 
581      * @param meanImpl the StorelessUnivariateStatistic instance to use
582      * for computing the mean
583      * @throws IllegalStateException if data has already been added
584      *  (i.e if n > 0)
585      * @since 1.2
586      */
587     public void setMeanImpl(
588             StorelessUnivariateStatistic meanImpl) {
589         checkEmpty();
590         this.meanImpl = meanImpl;
591     }
592 
593     /**
594      * Returns the currently configured variance implementation
595      * 
596      * @return the StorelessUnivariateStatistic implementing the variance
597      * @since 1.2
598      */
599     public StorelessUnivariateStatistic getVarianceImpl() {
600         return varianceImpl;
601     }
602 
603     /**
604      * <p>Sets the implementation for the variance.</p>
605      * <p>This method must be activated before any data has been added - i.e.,
606      * before {@link #addValue(double) addValue} has been used to add data; 
607      * otherwise an IllegalStateException will be thrown.</p>
608      * 
609      * @param varianceImpl the StorelessUnivariateStatistic instance to use
610      * for computing the variance
611      * @throws IllegalStateException if data has already been added
612      *  (i.e if n > 0)
613      * @since 1.2
614      */
615     public void setVarianceImpl(
616             StorelessUnivariateStatistic varianceImpl) {
617         checkEmpty();
618         this.varianceImpl = varianceImpl;
619     }
620     
621     /**
622      * Throws IllegalStateException if n > 0.
623      */
624     private void checkEmpty() {
625         if (n > 0) {
626             throw new IllegalStateException(
627                 "Implementations must be configured before values are added.");
628         }
629     }
630 
631 }