1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.math.distribution; 19 20 import java.io.Serializable; 21 22 import org.apache.commons.math.util.MathUtils; 23 24 /** 25 * The default implementation of {@link HypergeometricDistribution}. 26 * 27 * @version $Revision: 480440 $ $Date: 2006-11-29 00:14:12 -0700 (Wed, 29 Nov 2006) $ 28 */ 29 public class HypergeometricDistributionImpl extends AbstractIntegerDistribution 30 implements HypergeometricDistribution, Serializable 31 { 32 33 /** Serializable version identifier */ 34 private static final long serialVersionUID = -436928820673516179L; 35 36 /** The number of successes in the population. */ 37 private int numberOfSuccesses; 38 39 /** The population size. */ 40 private int populationSize; 41 42 /** The sample size. */ 43 private int sampleSize; 44 45 /** 46 * Construct a new hypergeometric distribution with the given the population 47 * size, the number of successes in the population, and the sample size. 48 * @param populationSize the population size. 49 * @param numberOfSuccesses number of successes in the population. 50 * @param sampleSize the sample size. 51 */ 52 public HypergeometricDistributionImpl(int populationSize, 53 int numberOfSuccesses, int sampleSize) { 54 super(); 55 if (numberOfSuccesses > populationSize) { 56 throw new IllegalArgumentException( 57 "number of successes must be less than or equal to " + 58 "population size"); 59 } 60 if (sampleSize > populationSize) { 61 throw new IllegalArgumentException( 62 "sample size must be less than or equal to population size"); 63 } 64 setPopulationSize(populationSize); 65 setSampleSize(sampleSize); 66 setNumberOfSuccesses(numberOfSuccesses); 67 } 68 69 /** 70 * For this disbution, X, this method returns P(X ≤ x). 71 * @param x the value at which the PDF is evaluated. 72 * @return PDF for this distribution. 73 */ 74 public double cumulativeProbability(int x) { 75 double ret; 76 77 int n = getPopulationSize(); 78 int m = getNumberOfSuccesses(); 79 int k = getSampleSize(); 80 81 int[] domain = getDomain(n, m, k); 82 if (x < domain[0]) { 83 ret = 0.0; 84 } else if(x >= domain[1]) { 85 ret = 1.0; 86 } else { 87 ret = innerCumulativeProbability(domain[0], x, 1, n, m, k); 88 } 89 90 return ret; 91 } 92 93 /** 94 * Return the domain for the given hypergeometric distribution parameters. 95 * @param n the population size. 96 * @param m number of successes in the population. 97 * @param k the sample size. 98 * @return a two element array containing the lower and upper bounds of the 99 * hypergeometric distribution. 100 */ 101 private int[] getDomain(int n, int m, int k){ 102 return new int[]{ 103 getLowerDomain(n, m, k), 104 getUpperDomain(m, k) 105 }; 106 } 107 108 /** 109 * Access the domain value lower bound, based on <code>p</code>, used to 110 * bracket a PDF root. 111 * 112 * @param p the desired probability for the critical value 113 * @return domain value lower bound, i.e. 114 * P(X < <i>lower bound</i>) < <code>p</code> 115 */ 116 protected int getDomainLowerBound(double p) { 117 return getLowerDomain(getPopulationSize(), getNumberOfSuccesses(), 118 getSampleSize()); 119 } 120 121 /** 122 * Access the domain value upper bound, based on <code>p</code>, used to 123 * bracket a PDF root. 124 * 125 * @param p the desired probability for the critical value 126 * @return domain value upper bound, i.e. 127 * P(X < <i>upper bound</i>) > <code>p</code> 128 */ 129 protected int getDomainUpperBound(double p) { 130 return getUpperDomain(getSampleSize(), getNumberOfSuccesses()); 131 } 132 133 /** 134 * Return the lowest domain value for the given hypergeometric distribution 135 * parameters. 136 * @param n the population size. 137 * @param m number of successes in the population. 138 * @param k the sample size. 139 * @return the lowest domain value of the hypergeometric distribution. 140 */ 141 private int getLowerDomain(int n, int m, int k) { 142 return Math.max(0, m - (n - k)); 143 } 144 145 /** 146 * Access the number of successes. 147 * @return the number of successes. 148 */ 149 public int getNumberOfSuccesses() { 150 return numberOfSuccesses; 151 } 152 153 /** 154 * Access the population size. 155 * @return the population size. 156 */ 157 public int getPopulationSize() { 158 return populationSize; 159 } 160 161 /** 162 * Access the sample size. 163 * @return the sample size. 164 */ 165 public int getSampleSize() { 166 return sampleSize; 167 } 168 169 /** 170 * Return the highest domain value for the given hypergeometric distribution 171 * parameters. 172 * @param m number of successes in the population. 173 * @param k the sample size. 174 * @return the highest domain value of the hypergeometric distribution. 175 */ 176 private int getUpperDomain(int m, int k){ 177 return Math.min(k, m); 178 } 179 180 /** 181 * For this disbution, X, this method returns P(X = x). 182 * 183 * @param x the value at which the PMF is evaluated. 184 * @return PMF for this distribution. 185 */ 186 public double probability(int x) { 187 double ret; 188 189 int n = getPopulationSize(); 190 int m = getNumberOfSuccesses(); 191 int k = getSampleSize(); 192 193 int[] domain = getDomain(n, m, k); 194 if(x < domain[0] || x > domain[1]){ 195 ret = 0.0; 196 } else { 197 ret = probability(n, m, k, x); 198 } 199 200 return ret; 201 } 202 203 /** 204 * For the disbution, X, defined by the given hypergeometric distribution 205 * parameters, this method returns P(X = x). 206 * 207 * @param n the population size. 208 * @param m number of successes in the population. 209 * @param k the sample size. 210 * @param x the value at which the PMF is evaluated. 211 * @return PMF for the distribution. 212 */ 213 private double probability(int n, int m, int k, int x) { 214 return Math.exp(MathUtils.binomialCoefficientLog(m, x) + 215 MathUtils.binomialCoefficientLog(n - m, k - x) - 216 MathUtils.binomialCoefficientLog(n, k)); 217 } 218 219 /** 220 * Modify the number of successes. 221 * @param num the new number of successes. 222 * @throws IllegalArgumentException if <code>num</code> is negative. 223 */ 224 public void setNumberOfSuccesses(int num) { 225 if(num < 0){ 226 throw new IllegalArgumentException( 227 "number of successes must be non-negative."); 228 } 229 numberOfSuccesses = num; 230 } 231 232 /** 233 * Modify the population size. 234 * @param size the new population size. 235 * @throws IllegalArgumentException if <code>size</code> is not positive. 236 */ 237 public void setPopulationSize(int size) { 238 if(size <= 0){ 239 throw new IllegalArgumentException( 240 "population size must be positive."); 241 } 242 populationSize = size; 243 } 244 245 /** 246 * Modify the sample size. 247 * @param size the new sample size. 248 * @throws IllegalArgumentException if <code>size</code> is negative. 249 */ 250 public void setSampleSize(int size) { 251 if (size < 0) { 252 throw new IllegalArgumentException( 253 "sample size must be non-negative."); 254 } 255 sampleSize = size; 256 } 257 258 /** 259 * For this disbution, X, this method returns P(X ≥ x). 260 * @param x the value at which the CDF is evaluated. 261 * @return upper tail CDF for this distribution. 262 * @since 1.1 263 */ 264 public double upperCumulativeProbability(int x) { 265 double ret; 266 267 int n = getPopulationSize(); 268 int m = getNumberOfSuccesses(); 269 int k = getSampleSize(); 270 271 int[] domain = getDomain(n, m, k); 272 if (x < domain[0]) { 273 ret = 1.0; 274 } else if(x > domain[1]) { 275 ret = 0.0; 276 } else { 277 ret = innerCumulativeProbability(domain[1], x, -1, n, m, k); 278 } 279 280 return ret; 281 } 282 283 /** 284 * For this disbution, X, this method returns P(x0 ≤ X ≤ x1). This 285 * probability is computed by summing the point probabilities for the values 286 * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx. 287 * @param x0 the inclusive, lower bound 288 * @param x1 the inclusive, upper bound 289 * @param dx the direction of summation. 1 indicates summing from x0 to x1. 290 * 0 indicates summing from x1 to x0. 291 * @param n the population size. 292 * @param m number of successes in the population. 293 * @param k the sample size. 294 * @return P(x0 ≤ X ≤ x1). 295 */ 296 private double innerCumulativeProbability( 297 int x0, int x1, int dx, int n, int m, int k) 298 { 299 double ret = probability(n, m, k, x0); 300 while (x0 != x1) { 301 x0 += dx; 302 ret += probability(n, m, k, x0); 303 } 304 return ret; 305 } 306 }