View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat;
18  
19  import org.apache.commons.math.stat.descriptive.UnivariateStatistic;
20  import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
21  import org.apache.commons.math.stat.descriptive.moment.Mean;
22  import org.apache.commons.math.stat.descriptive.moment.Variance;
23  import org.apache.commons.math.stat.descriptive.rank.Max;
24  import org.apache.commons.math.stat.descriptive.rank.Min;
25  import org.apache.commons.math.stat.descriptive.rank.Percentile;
26  import org.apache.commons.math.stat.descriptive.summary.Product;
27  import org.apache.commons.math.stat.descriptive.summary.Sum;
28  import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
29  import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
30  
31  /**
32   * StatUtils provides static methods for computing statistics based on data
33   * stored in double[] arrays. 
34   * 
35   * @version $Revision: 617953 $ $Date: 2008-02-02 22:54:00 -0700 (Sat, 02 Feb 2008) $
36   */
37  public final class StatUtils {
38  
39      /** sum */
40      private static UnivariateStatistic sum = new Sum();
41  
42      /** sumSq */
43      private static UnivariateStatistic sumSq = new SumOfSquares();
44  
45      /** prod */
46      private static UnivariateStatistic prod = new Product();
47  
48      /** sumLog */
49      private static UnivariateStatistic sumLog = new SumOfLogs();
50  
51      /** min */
52      private static UnivariateStatistic min = new Min();
53  
54      /** max */
55      private static UnivariateStatistic max = new Max();
56  
57      /** mean */
58      private static UnivariateStatistic mean = new Mean();
59  
60      /** variance */
61      private static Variance variance = new Variance();
62  
63      /** percentile */
64      private static Percentile percentile = new Percentile();
65      
66      /** geometric mean */
67      private static GeometricMean geometricMean = new GeometricMean();
68  
69      /**
70       * Private Constructor
71       */
72      private StatUtils() {
73      }
74  
75      /**
76       * Returns the sum of the values in the input array, or
77       * <code>Double.NaN</code> if the array is empty.
78       * <p>
79       * Throws <code>IllegalArgumentException</code> if the input array
80       * is null.</p>
81       * 
82       * @param values  array of values to sum
83       * @return the sum of the values or <code>Double.NaN</code> if the array
84       * is empty
85       * @throws IllegalArgumentException if the array is null
86       */
87      public static double sum(final double[] values) {
88          return sum.evaluate(values);
89      }
90  
91      /**
92       * Returns the sum of the entries in the specified portion of
93       * the input array, or <code>Double.NaN</code> if the designated subarray
94       * is empty.
95       * <p>
96       * Throws <code>IllegalArgumentException</code> if the array is null.</p>
97       * 
98       * @param values the input array
99       * @param begin index of the first array element to include
100      * @param length the number of elements to include
101      * @return the sum of the values or Double.NaN if length = 0
102      * @throws IllegalArgumentException if the array is null or the array index
103      *  parameters are not valid
104      */
105     public static double sum(final double[] values, final int begin, 
106             final int length) {
107         return sum.evaluate(values, begin, length);
108     }
109 
110     /**
111      * Returns the sum of the squares of the entries in the input array, or 
112      * <code>Double.NaN</code> if the array is empty.
113      * <p>
114      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
115      * 
116      * @param values  input array
117      * @return the sum of the squared values or <code>Double.NaN</code> if the
118      * array is empty
119      * @throws IllegalArgumentException if the array is null
120      */
121     public static double sumSq(final double[] values) {
122         return sumSq.evaluate(values);
123     }
124 
125     /**
126      * Returns the sum of the squares of the entries in the specified portion of
127      * the input array, or <code>Double.NaN</code> if the designated subarray
128      * is empty.
129      * <p>
130      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
131      * 
132      * @param values the input array
133      * @param begin index of the first array element to include
134      * @param length the number of elements to include
135      * @return the sum of the squares of the values or Double.NaN if length = 0
136      * @throws IllegalArgumentException if the array is null or the array index
137      * parameters are not valid
138      */
139     public static double sumSq(final double[] values, final int begin,
140             final int length) {
141         return sumSq.evaluate(values, begin, length);
142     }
143 
144     /**
145      * Returns the product of the entries in the input array, or 
146      * <code>Double.NaN</code> if the array is empty.
147      * <p>
148      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
149      * 
150      * @param values the input array
151      * @return the product of the values or Double.NaN if the array is empty
152      * @throws IllegalArgumentException if the array is null
153      */
154     public static double product(final double[] values) {
155         return prod.evaluate(values);
156     }
157 
158     /**
159      * Returns the product of the entries in the specified portion of
160      * the input array, or <code>Double.NaN</code> if the designated subarray
161      * is empty.
162      * <p>
163      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
164      * 
165      * @param values the input array
166      * @param begin index of the first array element to include
167      * @param length the number of elements to include
168      * @return the product of the values or Double.NaN if length = 0
169      * @throws IllegalArgumentException if the array is null or the array index
170      * parameters are not valid
171      */
172     public static double product(final double[] values, final int begin,
173             final int length) {
174         return prod.evaluate(values, begin, length);
175     }
176 
177     /**
178      * Returns the sum of the natural logs of the entries in the input array, or 
179      * <code>Double.NaN</code> if the array is empty.
180      * <p>
181      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
182      * <p>
183      * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
184      * </p>
185      * 
186      * @param values the input array
187      * @return the sum of the natural logs of the values or Double.NaN if 
188      * the array is empty
189      * @throws IllegalArgumentException if the array is null
190      */
191     public static double sumLog(final double[] values) {
192         return sumLog.evaluate(values);
193     }
194 
195     /**
196      * Returns the sum of the natural logs of the entries in the specified portion of
197      * the input array, or <code>Double.NaN</code> if the designated subarray
198      * is empty.
199      * <p>
200      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
201      * <p>
202      * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
203      * </p>
204      * 
205      * @param values the input array
206      * @param begin index of the first array element to include
207      * @param length the number of elements to include
208      * @return the sum of the natural logs of the values or Double.NaN if 
209      * length = 0
210      * @throws IllegalArgumentException if the array is null or the array index
211      * parameters are not valid
212      */
213     public static double sumLog(final double[] values, final int begin,
214             final int length) {
215         return sumLog.evaluate(values, begin, length);
216     }
217 
218     /**
219      * Returns the arithmetic mean of the entries in the input array, or 
220      * <code>Double.NaN</code> if the array is empty.
221      * <p>
222      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
223      * <p>
224      * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
225      * details on the computing algorithm.</p>
226      * 
227      * @param values the input array
228      * @return the mean of the values or Double.NaN if the array is empty
229      * @throws IllegalArgumentException if the array is null
230      */
231     public static double mean(final double[] values) {
232         return mean.evaluate(values);
233     }
234 
235     /**
236      * Returns the arithmetic mean of the entries in the specified portion of
237      * the input array, or <code>Double.NaN</code> if the designated subarray
238      * is empty.
239      * <p>
240      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
241      * <p>
242      * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
243      * details on the computing algorithm.</p>
244      * 
245      * @param values the input array
246      * @param begin index of the first array element to include
247      * @param length the number of elements to include
248      * @return the mean of the values or Double.NaN if length = 0
249      * @throws IllegalArgumentException if the array is null or the array index
250      * parameters are not valid
251      */
252     public static double mean(final double[] values, final int begin,
253             final int length) {
254         return mean.evaluate(values, begin, length);
255     }
256     
257     /**
258      * Returns the geometric mean of the entries in the input array, or 
259      * <code>Double.NaN</code> if the array is empty.
260      * <p>
261      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
262      * <p>
263      * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
264      * for details on the computing algorithm.</p>
265      * 
266      * @param values the input array
267      * @return the geometric mean of the values or Double.NaN if the array is empty
268      * @throws IllegalArgumentException if the array is null
269      */
270     public static double geometricMean(final double[] values) {
271         return geometricMean.evaluate(values);
272     }
273 
274     /**
275      * Returns the geometric mean of the entries in the specified portion of
276      * the input array, or <code>Double.NaN</code> if the designated subarray
277      * is empty.
278      * <p>
279      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
280      * <p>
281      * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
282      * for details on the computing algorithm.</p>
283      * 
284      * @param values the input array
285      * @param begin index of the first array element to include
286      * @param length the number of elements to include
287      * @return the geometric mean of the values or Double.NaN if length = 0
288      * @throws IllegalArgumentException if the array is null or the array index
289      * parameters are not valid
290      */
291     public static double geometricMean(final double[] values, final int begin,
292             final int length) {
293         return geometricMean.evaluate(values, begin, length);
294     }
295     
296 
297     /**
298      * Returns the variance of the entries in the input array, or 
299      * <code>Double.NaN</code> if the array is empty.
300      * <p>
301      * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
302      * details on the computing algorithm.</p>
303      * <p>
304      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
305      * <p>
306      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
307      * 
308      * @param values the input array
309      * @return the variance of the values or Double.NaN if the array is empty
310      * @throws IllegalArgumentException if the array is null
311      */
312     public static double variance(final double[] values) {
313         return variance.evaluate(values);
314     }
315 
316     /**
317      * Returns the variance of the entries in the specified portion of
318      * the input array, or <code>Double.NaN</code> if the designated subarray
319      * is empty.
320      * <p>
321      * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
322      * details on the computing algorithm.</p>
323      * <p>
324      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
325      * <p>
326      * Throws <code>IllegalArgumentException</code> if the array is null or the
327      * array index parameters are not valid.</p>
328      * 
329      * @param values the input array
330      * @param begin index of the first array element to include
331      * @param length the number of elements to include
332      * @return the variance of the values or Double.NaN if length = 0
333      * @throws IllegalArgumentException if the array is null or the array index
334      *  parameters are not valid
335      */
336     public static double variance(final double[] values, final int begin,
337             final int length) {
338         return variance.evaluate(values, begin, length);
339     }
340     
341     /**
342      * Returns the variance of the entries in the specified portion of
343      * the input array, using the precomputed mean value.  Returns 
344      * <code>Double.NaN</code> if the designated subarray is empty.
345      * <p>
346      * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
347      * details on the computing algorithm.</p>
348      * <p>
349      * The formula used assumes that the supplied mean value is the arithmetic
350      * mean of the sample data, not a known population parameter.  This method
351      * is supplied only to save computation when the mean has already been
352      * computed.</p>
353      * <p>
354      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
355      * <p>
356      * Throws <code>IllegalArgumentException</code> if the array is null or the
357      * array index parameters are not valid.</p>
358      * 
359      * @param values the input array
360      * @param mean the precomputed mean value
361      * @param begin index of the first array element to include
362      * @param length the number of elements to include
363      * @return the variance of the values or Double.NaN if length = 0
364      * @throws IllegalArgumentException if the array is null or the array index
365      *  parameters are not valid
366      */
367     public static double variance(final double[] values, final double mean, 
368             final int begin, final int length) {
369         return variance.evaluate(values, mean, begin, length);    
370     }
371     
372     /**
373      * Returns the variance of the entries in the input array, using the
374      * precomputed mean value.  Returns <code>Double.NaN</code> if the array
375      * is empty.  
376      * <p>
377      * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
378      * details on the computing algorithm.</p>  
379      * <p>
380      * The formula used assumes that the supplied mean value is the arithmetic
381      * mean of the sample data, not a known population parameter.  This method
382      * is supplied only to save computation when the mean has already been
383      * computed.</p>
384      * <p>
385      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
386      * <p>
387      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
388      * 
389      * @param values the input array
390      * @param mean the precomputed mean value
391      * @return the variance of the values or Double.NaN if the array is empty
392      * @throws IllegalArgumentException if the array is null
393      */
394     public static double variance(final double[] values, final double mean) {
395         return variance.evaluate(values, mean);    
396     }
397 
398     /**
399      * Returns the maximum of the entries in the input array, or 
400      * <code>Double.NaN</code> if the array is empty.
401      * <p>
402      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
403      * <p>
404      * <ul>
405      * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 
406      * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
407      * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>, 
408      * the result is <code>Double.POSITIVE_INFINITY.</code></li>
409      * </ul></p>
410      * 
411      * @param values the input array
412      * @return the maximum of the values or Double.NaN if the array is empty
413      * @throws IllegalArgumentException if the array is null
414      */
415     public static double max(final double[] values) {
416         return max.evaluate(values);
417     }
418 
419     /**
420      * Returns the maximum of the entries in the specified portion of
421      * the input array, or <code>Double.NaN</code> if the designated subarray
422      * is empty.
423      * <p>
424      * Throws <code>IllegalArgumentException</code> if the array is null or
425      * the array index parameters are not valid.</p>
426      * <p>
427      * <ul>
428      * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 
429      * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
430      * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>, 
431      * the result is <code>Double.POSITIVE_INFINITY.</code></li>
432      * </ul></p>
433      * 
434      * @param values the input array
435      * @param begin index of the first array element to include
436      * @param length the number of elements to include
437      * @return the maximum of the values or Double.NaN if length = 0
438      * @throws IllegalArgumentException if the array is null or the array index
439      * parameters are not valid
440      */
441     public static double max(final double[] values, final int begin,
442             final int length) {
443         return max.evaluate(values, begin, length);
444     }
445 
446      /**
447      * Returns the minimum of the entries in the input array, or 
448      * <code>Double.NaN</code> if the array is empty.
449      * <p>
450      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
451      * <p>
452      * <ul>
453      * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 
454      * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
455      * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>, 
456      * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
457      * </ul> </p>
458      * 
459      * @param values the input array
460      * @return the minimum of the values or Double.NaN if the array is empty
461      * @throws IllegalArgumentException if the array is null
462      */
463     public static double min(final double[] values) {
464         return min.evaluate(values);
465     }
466 
467      /**
468      * Returns the minimum of the entries in the specified portion of
469      * the input array, or <code>Double.NaN</code> if the designated subarray
470      * is empty.
471      * <p>
472      * Throws <code>IllegalArgumentException</code> if the array is null or
473      * the array index parameters are not valid.</p>
474      * <p>
475      * <ul>
476      * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 
477      * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
478      * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>, 
479      * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
480      * </ul></p>
481      * 
482      * @param values the input array
483      * @param begin index of the first array element to include
484      * @param length the number of elements to include
485      * @return the minimum of the values or Double.NaN if length = 0
486      * @throws IllegalArgumentException if the array is null or the array index
487      * parameters are not valid
488      */
489     public static double min(final double[] values, final int begin,
490             final int length) {
491         return min.evaluate(values, begin, length);
492     }
493     
494     /**
495      * Returns an estimate of the <code>p</code>th percentile of the values
496      * in the <code>values</code> array.
497      * <p>
498      * <ul>
499      * <li>Returns <code>Double.NaN</code> if <code>values</code> has length 
500      * <code>0</code></li></p>
501      * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
502      *  if <code>values</code> has length <code>1</code></li>
503      * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
504      * is null  or p is not a valid quantile value (p must be greater than 0
505      * and less than or equal to 100)</li>
506      * </ul></p>
507      * <p>
508      * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
509      * a description of the percentile estimation algorithm used.</p>
510      * 
511      * @param values input array of values
512      * @param p the percentile value to compute
513      * @return the percentile value or Double.NaN if the array is empty
514      * @throws IllegalArgumentException if <code>values</code> is null 
515      * or p is invalid
516      */
517     public static double percentile(final double[] values, final double p) {
518             return percentile.evaluate(values,p);
519     }
520 
521      /**
522      * Returns an estimate of the <code>p</code>th percentile of the values
523      * in the <code>values</code> array, starting with the element in (0-based)
524      * position <code>begin</code> in the array and including <code>length</code>
525      * values.
526      * <p>
527      * <ul>
528      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
529      * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
530      *  if <code>length = 1 </code></li>
531      * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
532      *  is null , <code>begin</code> or <code>length</code> is invalid, or 
533      * <code>p</code> is not a valid quantile value (p must be greater than 0
534      * and less than or equal to 100)</li>
535      * </ul></p>
536      * <p>
537       * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
538       * a description of the percentile estimation algorithm used.</p>
539      * 
540      * @param values array of input values
541      * @param p  the percentile to compute
542      * @param begin  the first (0-based) element to include in the computation
543      * @param length  the number of array elements to include
544      * @return  the percentile value
545      * @throws IllegalArgumentException if the parameters are not valid or the
546      * input array is null
547      */
548     public static double percentile(final double[] values, final int begin, 
549             final int length, final double p) {
550         return percentile.evaluate(values, begin, length, p);
551     }   
552     
553     /**
554      * Returns the sum of the (signed) differences between corresponding elements of the
555      * input arrays -- i.e., sum(sample1[i] - sample2[i]).
556      * 
557      * @param sample1  the first array
558      * @param sample2  the second array
559      * @return sum of paired differences
560      * @throws IllegalArgumentException if the arrays do not have the same
561      * (positive) length
562      */
563     public static double sumDifference(final double[] sample1, final double[] sample2)
564         throws IllegalArgumentException {
565         int n = sample1.length;
566         if (n  != sample2.length || n < 1) {
567             throw new IllegalArgumentException 
568                 ("Input arrays must have the same (positive) length.");
569         }
570         double result = 0;
571         for (int i = 0; i < n; i++) {
572             result += sample1[i] - sample2[i];
573         }
574         return result;
575     }
576     
577     /**
578      * Returns the mean of the (signed) differences between corresponding elements of the
579      * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length.
580      * 
581      * @param sample1  the first array
582      * @param sample2  the second array
583      * @return mean of paired differences
584      * @throws IllegalArgumentException if the arrays do not have the same
585      * (positive) length
586      */
587     public static double meanDifference(final double[] sample1, final double[] sample2)
588     throws IllegalArgumentException {
589         return sumDifference(sample1, sample2) / (double) sample1.length;
590     }
591     
592     /**
593      * Returns the variance of the (signed) differences between corresponding elements of the
594      * input arrays -- i.e., var(sample1[i] - sample2[i]).
595      * 
596      * @param sample1  the first array
597      * @param sample2  the second array
598      * @param meanDifference   the mean difference between corresponding entries 
599      * @see #meanDifference(double[],double[])
600      * @return variance of paired differences
601      * @throws IllegalArgumentException if the arrays do not have the same
602      * length or their common length is less than 2.
603      */
604     public static double varianceDifference(final double[] sample1, final double[] sample2, 
605             double meanDifference)  throws IllegalArgumentException {
606         double sum1 = 0d;
607         double sum2 = 0d;
608         double diff = 0d;
609         int n = sample1.length;
610         if (n < 2 || n != sample2.length) {
611             throw new IllegalArgumentException("Input array lengths must be equal and at least 2.");
612         }
613         for (int i = 0; i < n; i++) {
614             diff = sample1[i] - sample2[i];
615             sum1 += (diff - meanDifference) *(diff - meanDifference);
616             sum2 += diff - meanDifference;
617         }
618         return (sum1 - (sum2 * sum2 / (double) n)) / (double) (n - 1);
619     }      
620     
621 }