View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.inference;
18  
19  import org.apache.commons.math.MathException;
20  import org.apache.commons.math.stat.descriptive.summary.Sum;
21  import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
22  
23  import org.apache.commons.math.distribution.FDistribution;
24  import org.apache.commons.math.distribution.FDistributionImpl;
25  
26  import java.util.Collection;
27  import java.util.Iterator;
28  
29  
30  /**
31   * Implements one-way ANOVA statistics defined in the {@link OneWayAnovaImpl}
32   * interface.
33   * 
34   * <p>Uses the 
35   * {@link org.apache.commons.math.distribution.FDistribution
36   *  commons-math F Distribution implementation} to estimate exact p-values.</p>
37   *
38   * <p>This implementation is based on a description at 
39   * http://faculty.vassar.edu/lowry/ch13pt1.html</p>
40   * <pre>
41   * Abbreviations: bg = between groups,
42   *                wg = within groups,
43   *                ss = sum squared deviations
44   * </pre>
45   *
46   * @since 1.2
47   * @version $Revision$ $Date$
48   */
49  public class OneWayAnovaImpl implements OneWayAnova  {
50  
51      /**
52       * Default constructor.
53       */
54      public OneWayAnovaImpl() {
55      }
56      
57      /**
58       * {@inheritDoc}<p>
59       * This implementation computes the F statistic using the definitional 
60       * formula<pre>
61       *   F = msbg/mswg</pre>
62       * where<pre>
63       *  msbg = between group mean square
64       *  mswg = within group mean square</pre>
65       * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html">
66       * here</a></p>
67       */
68      public double anovaFValue(Collection categoryData)
69          throws IllegalArgumentException, MathException {
70          AnovaStats a = anovaStats(categoryData);
71          return a.F;
72      }
73  
74      /**
75       * {@inheritDoc}<p>
76       * This implementation uses the
77       * {@link org.apache.commons.math.distribution.FDistribution
78       * commons-math F Distribution implementation} to estimate the exact
79       * p-value, using the formula<pre>
80       *   p = 1 - cumulativeProbability(F)</pre>
81       * where <code>F</code> is the F value and <code>cumulativeProbability</code>
82       * is the commons-math implementation of the F distribution.</p>
83       */
84      public double anovaPValue(Collection categoryData)
85          throws IllegalArgumentException, MathException {
86          AnovaStats a = anovaStats(categoryData);
87          FDistribution fdist = new FDistributionImpl(a.dfbg, a.dfwg);
88          return 1.0 - fdist.cumulativeProbability(a.F);
89      }
90  
91      /**
92       * {@inheritDoc}<p>
93       * This implementation uses the
94       * {@link org.apache.commons.math.distribution.FDistribution
95       * commons-math F Distribution implementation} to estimate the exact
96       * p-value, using the formula<pre>
97       *   p = 1 - cumulativeProbability(F)</pre>
98       * where <code>F</code> is the F value and <code>cumulativeProbability</code>
99       * is the commons-math implementation of the F distribution.</p>
100      * <p>True is returned iff the estimated p-value is less than alpha.</p>
101      */
102     public boolean anovaTest(Collection categoryData, double alpha)
103         throws IllegalArgumentException, MathException {
104         if ((alpha <= 0) || (alpha > 0.5)) {
105             throw new IllegalArgumentException("bad significance level: " + alpha);
106         }
107         return (anovaPValue(categoryData) < alpha);
108     }
109 
110 
111     /**
112      * This method actually does the calculations (except P-value).
113      * 
114      * @param categoryData <code>Collection</code> of <code>double[]</code>
115      * arrays each containing data for one category
116      * @return computed AnovaStats
117      * @throws IllegalArgumentException if categoryData does not meet
118      * preconditions specified in the interface definition
119      * @throws MathException if an error occurs computing the Anova stats
120      */
121     private AnovaStats anovaStats(Collection categoryData)
122         throws IllegalArgumentException, MathException {
123 
124         // check if we have enough categories
125         if (categoryData.size() < 2) {
126             throw new IllegalArgumentException(
127                     "ANOVA: two or more categories required");
128         }
129         
130         // check if each category has enough data and all is double[]
131         for (Iterator iterator = categoryData.iterator(); iterator.hasNext();) {
132             double[] array;
133             try {
134                 array = (double[])iterator.next();
135             } catch (ClassCastException ex) {
136                 throw new IllegalArgumentException(
137                         "ANOVA: categoryData contains non-double[] elements.");
138             }
139             if (array.length <= 1) {
140                 throw new IllegalArgumentException(
141                         "ANOVA: one element of categoryData has fewer than 2 values.");
142             }
143         }
144 
145         int dfwg = 0;
146         double sswg = 0;
147         Sum totsum = new Sum();
148         SumOfSquares totsumsq = new SumOfSquares();
149         int totnum = 0;
150         
151         for (Iterator iterator = categoryData.iterator(); iterator.hasNext();) {
152             double[] data = (double[])iterator.next();
153 
154             Sum sum = new Sum();
155             SumOfSquares sumsq = new SumOfSquares();
156             int num = 0;
157 
158             for (int i = 0; i < data.length; i++) {
159                 double val = data[i];
160 
161                 // within category
162                 num++;
163                 sum.increment(val);
164                 sumsq.increment(val);
165 
166                 // for all categories
167                 totnum++;
168                 totsum.increment(val);
169                 totsumsq.increment(val);
170             }
171             dfwg += num - 1;
172             double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num;
173             sswg += ss;
174         }
175         double sst = totsumsq.getResult() - totsum.getResult() * 
176             totsum.getResult()/totnum;
177         double ssbg = sst - sswg;
178         int dfbg = categoryData.size() - 1;
179         double msbg = ssbg/dfbg;
180         double mswg = sswg/dfwg;
181         double F = msbg/mswg;
182 
183         return new AnovaStats(dfbg, dfwg, F);
184     }
185 
186     /** 
187         Convenience class to pass dfbg,dfwg,F values around within AnovaImpl.
188         No get/set methods provided.
189     */
190     private static class AnovaStats {
191         private int dfbg;
192         private int dfwg;
193         private double F;
194 
195         /**
196          * Constructor
197          * @param dfbg degrees of freedom in numerator (between groups)
198          * @param dfwg degrees of freedom in denominator (within groups)
199          * @param F statistic
200          */
201         AnovaStats(int dfbg, int dfwg, double F) {
202             this.dfbg = dfbg;
203             this.dfwg = dfwg;
204             this.F = F;
205         }
206     }
207 
208 }