View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math.distribution;
19  
20  import java.io.Serializable;
21  
22  import org.apache.commons.math.util.MathUtils;
23  
24  /**
25   * The default implementation of {@link HypergeometricDistribution}.
26   *
27   * @version $Revision: 480440 $ $Date: 2006-11-29 00:14:12 -0700 (Wed, 29 Nov 2006) $
28   */
29  public class HypergeometricDistributionImpl extends AbstractIntegerDistribution
30      implements HypergeometricDistribution, Serializable 
31  {
32  
33      /** Serializable version identifier */
34      private static final long serialVersionUID = -436928820673516179L;
35  
36      /** The number of successes in the population. */
37      private int numberOfSuccesses;
38      
39      /** The population size. */
40      private int populationSize;
41      
42      /** The sample size. */
43      private int sampleSize;
44      
45      /**
46       * Construct a new hypergeometric distribution with the given the population
47       * size, the number of successes in the population, and the sample size.
48       * @param populationSize the population size.
49       * @param numberOfSuccesses number of successes in the population.
50       * @param sampleSize the sample size.
51       */
52      public HypergeometricDistributionImpl(int populationSize,
53          int numberOfSuccesses, int sampleSize) {
54          super();
55          if (numberOfSuccesses > populationSize) {
56              throw new IllegalArgumentException(
57                  "number of successes must be less than or equal to " +
58                  "population size");
59          }
60          if (sampleSize > populationSize) {
61              throw new IllegalArgumentException(
62              "sample size must be less than or equal to population size");
63          }
64          setPopulationSize(populationSize);
65          setSampleSize(sampleSize);
66          setNumberOfSuccesses(numberOfSuccesses);
67      }
68  
69      /**
70       * For this disbution, X, this method returns P(X ≤ x).
71       * @param x the value at which the PDF is evaluated.
72       * @return PDF for this distribution. 
73       */
74      public double cumulativeProbability(int x) {
75          double ret;
76          
77          int n = getPopulationSize();
78          int m = getNumberOfSuccesses();
79          int k = getSampleSize();
80  
81          int[] domain = getDomain(n, m, k);
82          if (x < domain[0]) {
83              ret = 0.0;
84          } else if(x >= domain[1]) {
85              ret = 1.0;
86          } else {
87              ret = innerCumulativeProbability(domain[0], x, 1, n, m, k);
88          }
89          
90          return ret;
91      }
92  
93      /**
94       * Return the domain for the given hypergeometric distribution parameters.
95       * @param n the population size.
96       * @param m number of successes in the population.
97       * @param k the sample size.
98       * @return a two element array containing the lower and upper bounds of the
99       *         hypergeometric distribution.  
100      */
101     private int[] getDomain(int n, int m, int k){
102         return new int[]{
103             getLowerDomain(n, m, k),
104             getUpperDomain(m, k)
105         };
106     }
107     
108     /**
109      * Access the domain value lower bound, based on <code>p</code>, used to
110      * bracket a PDF root.
111      * 
112      * @param p the desired probability for the critical value
113      * @return domain value lower bound, i.e.
114      *         P(X &lt; <i>lower bound</i>) &lt; <code>p</code> 
115      */
116     protected int getDomainLowerBound(double p) {
117         return getLowerDomain(getPopulationSize(), getNumberOfSuccesses(),
118             getSampleSize());
119     }
120     
121     /**
122      * Access the domain value upper bound, based on <code>p</code>, used to
123      * bracket a PDF root.
124      * 
125      * @param p the desired probability for the critical value
126      * @return domain value upper bound, i.e.
127      *         P(X &lt; <i>upper bound</i>) &gt; <code>p</code> 
128      */
129     protected int getDomainUpperBound(double p) {
130         return getUpperDomain(getSampleSize(), getNumberOfSuccesses());
131     }
132 
133     /**
134      * Return the lowest domain value for the given hypergeometric distribution
135      * parameters.
136      * @param n the population size.
137      * @param m number of successes in the population.
138      * @param k the sample size.
139      * @return the lowest domain value of the hypergeometric distribution.  
140      */
141     private int getLowerDomain(int n, int m, int k) {
142         return Math.max(0, m - (n - k));
143     }
144 
145     /**
146      * Access the number of successes.
147      * @return the number of successes.
148      */
149     public int getNumberOfSuccesses() {
150         return numberOfSuccesses;
151     }
152 
153     /**
154      * Access the population size.
155      * @return the population size.
156      */
157     public int getPopulationSize() {
158         return populationSize;
159     }
160 
161     /**
162      * Access the sample size.
163      * @return the sample size.
164      */
165     public int getSampleSize() {
166         return sampleSize;
167     }
168 
169     /**
170      * Return the highest domain value for the given hypergeometric distribution
171      * parameters.
172      * @param m number of successes in the population.
173      * @param k the sample size.
174      * @return the highest domain value of the hypergeometric distribution.  
175      */
176     private int getUpperDomain(int m, int k){
177         return Math.min(k, m);
178     }
179 
180     /**
181      * For this disbution, X, this method returns P(X = x).
182      * 
183      * @param x the value at which the PMF is evaluated.
184      * @return PMF for this distribution. 
185      */
186     public double probability(int x) {
187         double ret;
188         
189         int n = getPopulationSize();
190         int m = getNumberOfSuccesses();
191         int k = getSampleSize();
192 
193         int[] domain = getDomain(n, m, k);
194         if(x < domain[0] || x > domain[1]){
195             ret = 0.0;
196         } else {
197             ret = probability(n, m, k, x);
198         }
199         
200         return ret;
201     }
202     
203     /**
204      * For the disbution, X, defined by the given hypergeometric distribution
205      * parameters, this method returns P(X = x).
206      * 
207      * @param n the population size.
208      * @param m number of successes in the population.
209      * @param k the sample size.
210      * @param x the value at which the PMF is evaluated.
211      * @return PMF for the distribution. 
212      */
213     private double probability(int n, int m, int k, int x) {
214         return Math.exp(MathUtils.binomialCoefficientLog(m, x) +
215             MathUtils.binomialCoefficientLog(n - m, k - x) -
216             MathUtils.binomialCoefficientLog(n, k));
217     }
218 
219     /**
220      * Modify the number of successes.
221      * @param num the new number of successes.
222      * @throws IllegalArgumentException if <code>num</code> is negative.
223      */
224     public void setNumberOfSuccesses(int num) {
225         if(num < 0){
226             throw new IllegalArgumentException(
227                 "number of successes must be non-negative.");
228         }
229         numberOfSuccesses = num;
230     }
231 
232     /**
233      * Modify the population size.
234      * @param size the new population size.
235      * @throws IllegalArgumentException if <code>size</code> is not positive.
236      */
237     public void setPopulationSize(int size) {
238         if(size <= 0){
239             throw new IllegalArgumentException(
240                 "population size must be positive.");
241         }
242         populationSize = size;
243     }
244     
245     /**
246      * Modify the sample size.
247      * @param size the new sample size.
248      * @throws IllegalArgumentException if <code>size</code> is negative.
249      */
250     public void setSampleSize(int size) {
251         if (size < 0) {
252             throw new IllegalArgumentException(
253                 "sample size must be non-negative.");
254         }    
255         sampleSize = size;
256     }
257 
258     /**
259      * For this disbution, X, this method returns P(X &ge; x).
260      * @param x the value at which the CDF is evaluated.
261      * @return upper tail CDF for this distribution.
262      * @since 1.1
263      */
264     public double upperCumulativeProbability(int x) {
265         double ret;
266         
267         int n = getPopulationSize();
268         int m = getNumberOfSuccesses();
269         int k = getSampleSize();
270 
271         int[] domain = getDomain(n, m, k);
272         if (x < domain[0]) {
273             ret = 1.0;
274         } else if(x > domain[1]) {
275             ret = 0.0;
276         } else {
277             ret = innerCumulativeProbability(domain[1], x, -1, n, m, k);
278         }
279         
280         return ret;
281     }
282     
283     /**
284      * For this disbution, X, this method returns P(x0 &le; X &le; x1).  This
285      * probability is computed by summing the point probabilities for the values
286      * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx. 
287      * @param x0 the inclusive, lower bound
288      * @param x1 the inclusive, upper bound
289      * @param dx the direction of summation. 1 indicates summing from x0 to x1.
290      *           0 indicates summing from x1 to x0.
291      * @param n the population size.
292      * @param m number of successes in the population.
293      * @param k the sample size.
294      * @return P(x0 &le; X &le; x1). 
295      */
296     private double innerCumulativeProbability(
297         int x0, int x1, int dx, int n, int m, int k)
298     {
299         double ret = probability(n, m, k, x0);
300         while (x0 != x1) {
301             x0 += dx;
302             ret += probability(n, m, k, x0);
303         }
304         return ret;
305     }
306 }