1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.rng.examples.jmh.sampling.distribution; 19 20 import org.apache.commons.rng.UniformRandomProvider; 21 import org.apache.commons.rng.sampling.distribution.DiscreteSampler; 22 import org.apache.commons.rng.sampling.distribution.KempSmallMeanPoissonSampler; 23 import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler; 24 import org.apache.commons.rng.sampling.distribution.SmallMeanPoissonSampler; 25 import org.apache.commons.rng.simple.RandomSource; 26 27 import org.openjdk.jmh.annotations.Benchmark; 28 import org.openjdk.jmh.annotations.BenchmarkMode; 29 import org.openjdk.jmh.annotations.Fork; 30 import org.openjdk.jmh.annotations.Measurement; 31 import org.openjdk.jmh.annotations.Mode; 32 import org.openjdk.jmh.annotations.OutputTimeUnit; 33 import org.openjdk.jmh.annotations.Param; 34 import org.openjdk.jmh.annotations.Scope; 35 import org.openjdk.jmh.annotations.Setup; 36 import org.openjdk.jmh.annotations.State; 37 import org.openjdk.jmh.annotations.Warmup; 38 39 import java.util.concurrent.TimeUnit; 40 41 /** 42 * Executes benchmark to compare the speed of generation of Poisson distributed random numbers. 43 */ 44 @BenchmarkMode(Mode.AverageTime) 45 @OutputTimeUnit(TimeUnit.NANOSECONDS) 46 @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 47 @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 48 @State(Scope.Benchmark) 49 @Fork(value = 1, jvmArgs = {"-server", "-Xms128M", "-Xmx128M"}) 50 public class PoissonSamplersPerformance { 51 /** 52 * The value for the baseline generation of an {@code int} value. 53 * 54 * <p>This must NOT be final!</p> 55 */ 56 private int value; 57 58 /** 59 * The mean for the call to {@link Math#exp(double)}. 60 */ 61 @State(Scope.Benchmark) 62 public static class Means { 63 /** 64 * The Poisson mean. This is set at a level where the small mean sampler is to be used 65 * in preference to the large mean sampler. 66 */ 67 @Param({"0.25", 68 "0.5", 69 "1", 70 "2", 71 "4", 72 "8", 73 "16", 74 "32", 75 }) 76 private double mean; 77 78 /** 79 * Gets the mean. 80 * 81 * @return the mean 82 */ 83 public double getMean() { 84 return mean; 85 } 86 } 87 88 /** 89 * The {@link DiscreteSampler} samplers to use for testing. Creates the sampler for each 90 * {@link RandomSource} in the default 91 * {@link org.apache.commons.rng.examples.jmh.RandomSources RandomSources}. 92 */ 93 @State(Scope.Benchmark) 94 public static class Sources { 95 /** 96 * RNG providers. 97 * 98 * <p>Use different speeds.</p> 99 * 100 * @see <a href="https://commons.apache.org/proper/commons-rng/userguide/rng.html"> 101 * Commons RNG user guide</a> 102 */ 103 @Param({ 104 "WELL_44497_B", 105 //"ISAAC", 106 "XO_RO_SHI_RO_128_PLUS", 107 }) 108 private String randomSourceName; 109 110 /** 111 * The sampler type. 112 */ 113 @Param({"SmallMeanPoissonSampler", 114 "KempSmallMeanPoissonSampler", 115 "BoundedKempSmallMeanPoissonSampler", 116 "KempSmallMeanPoissonSamplerP50", 117 "KempSmallMeanPoissonSamplerBinarySearch", 118 "KempSmallMeanPoissonSamplerGuideTable", 119 "LargeMeanPoissonSampler", 120 "TinyMeanPoissonSampler", 121 }) 122 private String samplerType; 123 124 /** 125 * The Poisson mean. This is set at a level where the small mean sampler is to be used 126 * in preference to the large mean sampler. 127 */ 128 @Param({"0.25", 129 "0.5", 130 "1", 131 "2", 132 "4", 133 "8", 134 "16", 135 "32", 136 "64", 137 }) 138 private double mean; 139 140 /** RNG. */ 141 private UniformRandomProvider generator; 142 143 /** The factory. */ 144 private DiscreteSamplerFactory factory; 145 146 /** The sampler. */ 147 private DiscreteSampler sampler; 148 149 /** 150 * A factory for creating DiscreteSampler objects. 151 */ 152 interface DiscreteSamplerFactory { 153 /** 154 * Creates the sampler. 155 * 156 * @return the sampler 157 */ 158 DiscreteSampler create(); 159 } 160 161 /** 162 * @return The RNG. 163 */ 164 public UniformRandomProvider getGenerator() { 165 return generator; 166 } 167 168 /** 169 * Gets the sampler. 170 * 171 * @return The sampler. 172 */ 173 public DiscreteSampler getSampler() { 174 return sampler; 175 } 176 177 /** Instantiates sampler. */ 178 @Setup 179 public void setup() { 180 final RandomSource randomSource = RandomSource.valueOf(randomSourceName); 181 generator = RandomSource.create(randomSource); 182 183 // This would benefit from Java 8 Supplier<DiscreteSampler> lambda function 184 if ("SmallMeanPoissonSampler".equals(samplerType)) { 185 factory = new DiscreteSamplerFactory() { 186 @Override 187 public DiscreteSampler create() { 188 return SmallMeanPoissonSampler.of(generator, mean); 189 } 190 }; 191 } else if ("KempSmallMeanPoissonSampler".equals(samplerType)) { 192 factory = new DiscreteSamplerFactory() { 193 @Override 194 public DiscreteSampler create() { 195 return KempSmallMeanPoissonSampler.of(generator, mean); 196 } 197 }; 198 } else if ("BoundedKempSmallMeanPoissonSampler".equals(samplerType)) { 199 factory = new DiscreteSamplerFactory() { 200 @Override 201 public DiscreteSampler create() { 202 return new BoundedKempSmallMeanPoissonSampler(generator, mean); 203 } 204 }; 205 } else if ("KempSmallMeanPoissonSamplerP50".equals(samplerType)) { 206 factory = new DiscreteSamplerFactory() { 207 @Override 208 public DiscreteSampler create() { 209 return new KempSmallMeanPoissonSamplerP50(generator, mean); 210 } 211 }; 212 } else if ("KempSmallMeanPoissonSamplerBinarySearch".equals(samplerType)) { 213 factory = new DiscreteSamplerFactory() { 214 @Override 215 public DiscreteSampler create() { 216 return new KempSmallMeanPoissonSamplerBinarySearch(generator, mean); 217 } 218 }; 219 } else if ("KempSmallMeanPoissonSamplerGuideTable".equals(samplerType)) { 220 factory = new DiscreteSamplerFactory() { 221 @Override 222 public DiscreteSampler create() { 223 return new KempSmallMeanPoissonSamplerGuideTable(generator, mean); 224 } 225 }; 226 } else if ("LargeMeanPoissonSampler".equals(samplerType)) { 227 factory = new DiscreteSamplerFactory() { 228 @Override 229 public DiscreteSampler create() { 230 // Note this is not valid when mean < 1 231 return LargeMeanPoissonSampler.of(generator, mean); 232 } 233 }; 234 } else if ("TinyMeanPoissonSampler".equals(samplerType)) { 235 factory = new DiscreteSamplerFactory() { 236 @Override 237 public DiscreteSampler create() { 238 // Note this is only valid when mean < -Math.exp(0x1p-32) == 22.18 239 return new TinyMeanPoissonSampler(generator, mean); 240 } 241 }; 242 } 243 sampler = factory.create(); 244 } 245 246 /** 247 * Creates a new instance of the sampler. 248 * 249 * @return The sampler. 250 */ 251 public DiscreteSampler createSampler() { 252 return factory.create(); 253 } 254 } 255 256 /** 257 * Kemp sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson 258 * distribution</a>. 259 * 260 * <ul> 261 * <li> 262 * For small means, a Poisson process is simulated using uniform deviates, as 263 * described in Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed 264 * Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253. 265 * </li> 266 * </ul> 267 * 268 * <p>Note: This is similar to {@link KempSmallMeanPoissonSampler} but the sample is 269 * bounded by 1000 * mean.</p> 270 * 271 * @see <a href="https://www.jstor.org/stable/2346348">Kemp, A.W. (1981) JRSS Vol. 30, pp. 249-253</a> 272 */ 273 static class BoundedKempSmallMeanPoissonSampler 274 implements DiscreteSampler { 275 /** Underlying source of randomness. */ 276 private final UniformRandomProvider rng; 277 /** 278 * Pre-compute {@code Math.exp(-mean)}. 279 * Note: This is the probability of the Poisson sample {@code p(x=0)}. 280 */ 281 private final double p0; 282 /** Pre-compute {@code 1000 * mean} as the upper limit of the sample. */ 283 private final int limit; 284 /** 285 * The mean of the Poisson sample. 286 */ 287 private final double mean; 288 289 /** 290 * @param rng Generator of uniformly distributed random numbers. 291 * @param mean Mean. 292 * @throws IllegalArgumentException if {@code mean <= 0} or {@code mean > 700}. 293 */ 294 BoundedKempSmallMeanPoissonSampler(UniformRandomProvider rng, 295 double mean) { 296 if (mean <= 0) { 297 throw new IllegalArgumentException(); 298 } 299 300 p0 = Math.exp(-mean); 301 if (p0 == 0) { 302 throw new IllegalArgumentException(); 303 } 304 // The returned sample is bounded by 1000 * mean 305 limit = (int) Math.ceil(1000 * mean); 306 this.rng = rng; 307 this.mean = mean; 308 } 309 310 /** {@inheritDoc} */ 311 @Override 312 public int sample() { 313 // Note on the algorithm: 314 // - X is the unknown sample deviate (the output of the algorithm) 315 // - x is the current value from the distribution 316 // - p is the probability of the current value x, p(X=x) 317 // - u is effectively the cumulative probability that the sample X 318 // is equal or above the current value x, p(X>=x) 319 // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x 320 double u = rng.nextDouble(); 321 int x = 0; 322 double p = p0; 323 while (u > p) { 324 u -= p; 325 // Compute the next probability using a recurrence relation. 326 // p(x+1) = p(x) * mean / (x+1) 327 p *= mean / ++x; 328 // The algorithm listed in Kemp (1981) does not check that the rolling probability 329 // is positive. This check is added to ensure a simple bounds in the event that 330 // p == 0 331 if (x == limit) { 332 return x; 333 } 334 } 335 return x; 336 } 337 } 338 339 /** 340 * Kemp sampler for the Poisson distribution. 341 * 342 * <p>Note: This is a modification of the original algorithm by Kemp. It implements a hedge 343 * on the cumulative probability set at 50%. This saves computation in half of the samples.</p> 344 */ 345 static class KempSmallMeanPoissonSamplerP50 346 implements DiscreteSampler { 347 /** The value of p=0.5. */ 348 private static final double ONE_HALF = 0.5; 349 /** 350 * The threshold that defines the cumulative probability for the long tail of the 351 * Poisson distribution. Above this threshold the recurrence relation that computes the 352 * next probability must check that the p-value is not zero. 353 */ 354 private static final double LONG_TAIL_THRESHOLD = 0.999; 355 356 /** Underlying source of randomness. */ 357 private final UniformRandomProvider rng; 358 /** 359 * Pre-compute {@code Math.exp(-mean)}. 360 * Note: This is the probability of the Poisson sample {@code p(x=0)}. 361 */ 362 private final double p0; 363 /** 364 * The mean of the Poisson sample. 365 */ 366 private final double mean; 367 /** 368 * Pre-compute the cumulative probability for all samples up to and including x. 369 * This is F(x) = sum of p(X<=x). 370 * 371 * <p>The value is computed at approximately 50% allowing the algorithm to choose to start 372 * at value (x+1) approximately half of the time. 373 */ 374 private final double fx; 375 /** 376 * Store the value (x+1) corresponding to the next value after the cumulative probability is 377 * above 50%. 378 */ 379 private final int x1; 380 /** 381 * Store the probability value p(x+1), allowing the algorithm to start from the point x+1. 382 */ 383 private final double px1; 384 385 /** 386 * Create a new instance. 387 * 388 * <p>This is valid for means as large as approximately {@code 744}.</p> 389 * 390 * @param rng Generator of uniformly distributed random numbers. 391 * @param mean Mean. 392 * @throws IllegalArgumentException if {@code mean <= 0} or {@code Math.exp(-mean) == 0}. 393 */ 394 KempSmallMeanPoissonSamplerP50(UniformRandomProvider rng, 395 double mean) { 396 if (mean <= 0) { 397 throw new IllegalArgumentException(); 398 } 399 400 this.rng = rng; 401 p0 = Math.exp(-mean); 402 this.mean = mean; 403 404 // Pre-compute a hedge value for the cumulative probability at approximately 50%. 405 // This is only done when p0 is less than the long tail threshold. 406 // The result is that the rolling probability computation should never hit the 407 // long tail where p reaches zero. 408 if (p0 <= LONG_TAIL_THRESHOLD) { 409 // Check the edge case for no probability 410 if (p0 == 0) { 411 throw new IllegalArgumentException(); 412 } 413 414 double p = p0; 415 int x = 0; 416 // Sum is cumulative probability F(x) = sum p(X<=x) 417 double sum = p; 418 while (sum < ONE_HALF) { 419 // Compute the next probability using a recurrence relation. 420 // p(x+1) = p(x) * mean / (x+1) 421 p *= mean / ++x; 422 sum += p; 423 } 424 fx = sum; 425 x1 = x + 1; 426 px1 = p * mean / x1; 427 } else { 428 // Always start at zero. 429 // Note: If NaN is input as the mean this path is executed and the sample is always zero. 430 fx = 0; 431 x1 = 0; 432 px1 = p0; 433 } 434 } 435 436 /** {@inheritDoc} */ 437 @Override 438 public int sample() { 439 // Note on the algorithm: 440 // - X is the unknown sample deviate (the output of the algorithm) 441 // - x is the current value from the distribution 442 // - p is the probability of the current value x, p(X=x) 443 // - u is effectively the cumulative probability that the sample X 444 // is equal or above the current value x, p(X>=x) 445 // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x 446 final double u = rng.nextDouble(); 447 448 if (u <= fx) { 449 // Sample from the lower half of the distribution starting at zero 450 return sampleBeforeLongTail(u, 0, p0); 451 } 452 453 // Sample from the upper half of the distribution starting at cumulative probability fx. 454 // This is reached when u > fx and sample X > x. 455 456 // If below the long tail threshold then omit the check on the asymptote of p -> zero 457 if (u <= LONG_TAIL_THRESHOLD) { 458 return sampleBeforeLongTail(u - fx, x1, px1); 459 } 460 461 return sampleWithinLongTail(u - fx, x1, px1); 462 } 463 464 /** 465 * Compute the sample assuming it is <strong>not</strong> in the long tail of the distribution. 466 * 467 * <p>This avoids a check on the next probability value assuming that the cumulative probability 468 * is at a level where the long tail of the Poisson distribution will not be reached. 469 * 470 * @param u the remaining cumulative probability (p(X>x)) 471 * @param x the current sample value X 472 * @param p the current probability of the sample (p(X=x)) 473 * @return the sample X 474 */ 475 private int sampleBeforeLongTail(double u, int x, double p) { 476 while (u > p) { 477 // Update the remaining cumulative probability 478 u -= p; 479 // Compute the next probability using a recurrence relation. 480 // p(x+1) = p(x) * mean / (x+1) 481 p *= mean / ++x; 482 // The algorithm listed in Kemp (1981) does not check that the rolling probability 483 // is positive (non-zero). This is omitted here on the assumption that the cumulative 484 // probability will not be in the long tail where the probability asymptotes to zero. 485 } 486 return x; 487 } 488 489 /** 490 * Compute the sample assuming it is in the long tail of the distribution. 491 * 492 * <p>This requires a check on the next probability value which is expected to asymptote to zero. 493 * 494 * @param u the remaining cumulative probability 495 * @param x the current sample value X 496 * @param p the current probability of the sample (p(X=x)) 497 * @return the sample X 498 */ 499 private int sampleWithinLongTail(double u, int x, double p) { 500 while (u > p) { 501 // Update the remaining cumulative probability 502 u -= p; 503 // Compute the next probability using a recurrence relation. 504 // p(x+1) = p(x) * mean / (x+1) 505 p *= mean / ++x; 506 // The algorithm listed in Kemp (1981) does not check that the rolling probability 507 // is positive. This check is added to ensure no errors when the limit of the summation 508 // 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic when 509 // in the long tail of the distribution. 510 if (p == 0) { 511 return x; 512 } 513 } 514 return x; 515 } 516 } 517 518 /** 519 * Kemp sampler for the Poisson distribution. 520 * 521 * <p>Note: This is a modification of the original algorithm by Kemp. It stores the 522 * cumulative probability table for repeat use. The table is searched using a binary 523 * search algorithm.</p> 524 */ 525 static class KempSmallMeanPoissonSamplerBinarySearch 526 implements DiscreteSampler { 527 /** 528 * Store the cumulative probability table size for integer means so that 99.99% 529 * of the Poisson distribution is covered. This is done until the table size is 530 * 2 * mean. 531 * 532 * <p>At higher mean the expected range is mean + 4 * sqrt(mean). To avoid the sqrt 533 * the conservative limit of 2 * mean is used. 534 */ 535 private static final int[] TABLE_SIZE = { 536 /* mean 1 to 10. */ 537 8, 10, 12, 14, 16, 18, 20, 22, 24, 25, 538 /* mean 11 to 20. */ 539 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 540 }; 541 542 /** Underlying source of randomness. */ 543 private final UniformRandomProvider rng; 544 /** 545 * The mean of the Poisson sample. 546 */ 547 private final double mean; 548 /** 549 * Store the cumulative probability for all samples up to and including x. 550 * This is F(x) = sum of p(X<=x). 551 * 552 * <p>This table is initialised to store cumulative probabilities for x up to 2 * mean 553 * or 99.99% (whichever is larger). 554 */ 555 private final double[] fx; 556 /** 557 * Store the value x corresponding to the last stored cumulative probability. 558 */ 559 private int lastX; 560 /** 561 * Store the probability value p(x) corresponding to last stored cumulative probability, 562 * allowing the algorithm to start from the point x. 563 */ 564 private double px; 565 566 /** 567 * Create a new instance. 568 * 569 * <p>This is valid for means as large as approximately {@code 744}.</p> 570 * 571 * @param rng Generator of uniformly distributed random numbers. 572 * @param mean Mean. 573 * @throws IllegalArgumentException if {@code mean <= 0} or {@code Math.exp(-mean) == 0}. 574 */ 575 KempSmallMeanPoissonSamplerBinarySearch(UniformRandomProvider rng, 576 double mean) { 577 if (mean <= 0) { 578 throw new IllegalArgumentException(); 579 } 580 581 px = Math.exp(-mean); 582 if (px > 0) { 583 this.rng = rng; 584 this.mean = mean; 585 586 // Initialise the cumulative probability table. 587 // The supported mean where p(x=0) > 0 sets a limit of around 744 so this will always be 588 // possible. 589 final int upperMean = (int) Math.ceil(mean); 590 fx = new double[(upperMean < TABLE_SIZE.length) ? TABLE_SIZE[upperMean] : upperMean * 2]; 591 fx[0] = px; 592 } else { 593 // This will catch NaN mean values 594 throw new IllegalArgumentException(); 595 } 596 } 597 598 /** {@inheritDoc} */ 599 @Override 600 public int sample() { 601 // Note on the algorithm: 602 // - X is the unknown sample deviate (the output of the algorithm) 603 // - x is the current value from the distribution 604 // - p is the probability of the current value x, p(X=x) 605 // - u is effectively the cumulative probability that the sample X 606 // is equal or above the current value x, p(X>=x) 607 // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x 608 final double u = rng.nextDouble(); 609 610 if (u <= fx[lastX]) { 611 // Binary search within known cumulative probability table. 612 // Find x so that u > f[x-1] and u <= f[x]. 613 // This is a looser search than Arrays.binarySearch: 614 // - The output is x = upper. 615 // - The pre-condition check ensures u <= f[upper] at the start. 616 // - The table stores probabilities where f[0] is non-zero. 617 // - u should be >= 0 (or the random generator is broken). 618 // - It avoids comparisons using Double.doubleToLongBits. 619 // - It avoids the low likelihood of equality between two doubles so uses 620 // only 1 compare per loop. 621 // - It uses a weighted middle anticipating that the cumulative distribution 622 // is skewed as the expected use case is a small mean. 623 int lower = 0; 624 int upper = lastX; 625 while (lower < upper) { 626 // Weighted middle is 1/4 of the range between lower and upper 627 final int mid = (3 * lower + upper) >>> 2; 628 final double midVal = fx[mid]; 629 if (u > midVal) { 630 // Change lower such that 631 // u > f[lower - 1] 632 lower = mid + 1; 633 } else { 634 // Change upper such that 635 // u <= f[upper] 636 upper = mid; 637 } 638 } 639 return upper; 640 } 641 642 // The sample is above x 643 int x1 = lastX + 1; 644 645 // Fill the cumulative probability table if possible 646 while (x1 < fx.length) { 647 // Compute the next probability using a recurrence relation. 648 // p(x+1) = p(x) * mean / (x+1) 649 px = nextProbability(px, x1); 650 // Compute the next cumulative probability f(x+1) and update 651 final double sum = fx[lastX] + px; 652 fx[++lastX] = sum; 653 // Check if this is the correct sample 654 if (u <= sum) { 655 return lastX; 656 } 657 x1 = lastX + 1; 658 } 659 660 // The sample is above the range of the cumulative probability table. 661 // Compute using the Kemp method. 662 // This requires the remaining cumulative probability and the probability for x+1. 663 return sampleWithinLongTail(u - fx[lastX], x1, nextProbability(px, x1)); 664 } 665 666 /** 667 * Compute the next probability using a recurrence relation. 668 * 669 * <pre> 670 * p(x + 1) = p(x) * mean / (x + 1) 671 * </pre> 672 * 673 * @param p the probability of x 674 * @param x1 the value of x+1 675 * @return the probability of x+1 676 */ 677 private double nextProbability(double p, int x1) { 678 return p * mean / x1; 679 } 680 681 /** 682 * Compute the sample assuming it is in the long tail of the distribution. 683 * 684 * <p>This requires a check on the next probability value which is expected to asymptote to zero. 685 * 686 * @param u the remaining cumulative probability 687 * @param x the current sample value X 688 * @param p the current probability of the sample (p(X=x)) 689 * @return the sample X 690 */ 691 private int sampleWithinLongTail(double u, int x, double p) { 692 while (u > p) { 693 // Update the remaining cumulative probability 694 u -= p; 695 p = nextProbability(p, ++x); 696 // The algorithm listed in Kemp (1981) does not check that the rolling probability 697 // is positive. This check is added to ensure no errors when the limit of the summation 698 // 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic when 699 // in the long tail of the distribution. 700 if (p == 0) { 701 return x; 702 } 703 } 704 return x; 705 } 706 } 707 /** 708 * Sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson 709 * distribution</a>. 710 * 711 * <p>Note: This is a modification of the original algorithm by Kemp. It stores the 712 * cumulative probability table for repeat use. The table is computed dynamically and 713 * searched using a guide table.</p> 714 */ 715 static class KempSmallMeanPoissonSamplerGuideTable implements DiscreteSampler { 716 /** 717 * Store the cumulative probability table size for integer means so that 99.99% of the 718 * Poisson distribution is covered. This is done until the table size is 2 * mean. 719 * 720 * <p>At higher mean the expected range is mean + 4 * sqrt(mean). To avoid the sqrt 721 * the conservative limit of 2 * mean is used. 722 */ 723 private static final int[] TABLE_SIZE = { 724 /* mean 1 to 10. */ 725 8, 10, 12, 14, 16, 18, 20, 22, 24, 25, 726 /* mean 11 to 20. */ 727 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, }; 728 729 /** Underlying source of randomness. */ 730 private final UniformRandomProvider rng; 731 /** 732 * The mean of the Poisson sample. 733 */ 734 private final double mean; 735 /** 736 * Store the cumulative probability for all samples up to and including x. This is 737 * F(x) = sum of p(X<=x). 738 * 739 * <p>This table is initialised to store cumulative probabilities for x up to 2 * mean 740 * or 99.99% (whichever is larger). 741 */ 742 private final double[] cumulativeProbability; 743 /** 744 * Store the value x corresponding to the last stored cumulative probability. 745 */ 746 private int tabulatedX; 747 /** 748 * Store the probability value p(x), allowing the algorithm to start from the point x. 749 */ 750 private double probabilityX; 751 /** 752 * The inverse cumulative probability guide table. This is a map between the cumulative 753 * probability (f(x)) and the value x. It is used to set the initial point for search 754 * of the cumulative probability table. 755 * 756 * <p>The index into the table is obtained using {@code f(x) * guideTable.length}. The value 757 * stored at the index is value {@code x+1} such that it is the exclusive upper bound 758 * on the sample value for searching the cumulative probability table. It requires the 759 * table search is towards zero.</p> 760 * 761 * <p>Note: Using x+1 ensures that the table can be zero filled upon initialisation and 762 * any index with zero has yet to be allocated.</p> 763 * 764 * <p>The guide table should never be used when the input f(x) is above the current range of 765 * the cumulative probability table. This would create an index that has not been 766 * allocated a mapping. 767 */ 768 private final int[] guideTable; 769 770 /** 771 * Create a new instance. 772 * 773 * <p>This is valid for means as large as approximately {@code 744}.</p> 774 * 775 * @param rng Generator of uniformly distributed random numbers. 776 * @param mean Mean. 777 * @throws IllegalArgumentException if {@code mean <= 0} or 778 * {@code Math.exp(-mean) == 0}. 779 */ 780 KempSmallMeanPoissonSamplerGuideTable(UniformRandomProvider rng, double mean) { 781 if (mean <= 0) { 782 throw new IllegalArgumentException("mean is not strictly positive: " + mean); 783 } 784 785 probabilityX = Math.exp(-mean); 786 if (probabilityX > 0) { 787 this.rng = rng; 788 this.mean = mean; 789 790 // Initialise the cumulative probability table. 791 // The supported mean where p(x=0) > 0 sets a limit of around 744 so the cast to int 792 // will always be possible. 793 final int upperMean = (int) Math.ceil(mean); 794 final int size = (upperMean < TABLE_SIZE.length) ? TABLE_SIZE[upperMean] : upperMean * 2; 795 cumulativeProbability = new double[size]; 796 cumulativeProbability[0] = probabilityX; 797 798 guideTable = new int[cumulativeProbability.length + 1]; 799 initialiseGuideTable(probabilityX); 800 } else { 801 // This will catch NaN mean values 802 throw new IllegalArgumentException("No probability for mean " + mean); 803 } 804 } 805 806 /** 807 * Initialise the cumulative probability guide table. All guide indices at or below the 808 * index corresponding to the given probability will be set to 1. 809 * 810 * @param p0 the probability for x=0 811 */ 812 private void initialiseGuideTable(double p0) { 813 for (int index = getGuideTableIndex(p0); index >= 0; index--) { 814 guideTable[index] = 1; 815 } 816 } 817 818 /** 819 * Fill the inverse cumulative probability guide table. Set the index corresponding to the 820 * given probability to x+1 establishing an exclusive upper bound on x for the probability. 821 * All unused guide indices below the index will also be set to x+1. 822 * 823 * @param p the cumulative probability 824 * @param x the sample value x 825 */ 826 private void updateGuideTable(double p, int x) { 827 // Always set the current index as the guide table is the exclusive upper bound 828 // for searching the cumulative probability table for any value p. 829 // Then fill any lower positions that are not allocated. 830 final int x1 = x + 1; 831 int index = getGuideTableIndex(p); 832 do { 833 guideTable[index--] = x1; 834 } while (index > 0 && guideTable[index] == 0); 835 } 836 837 /** 838 * Gets the guide table index for the probability. This is obtained using 839 * {@code p * (guideTable.length - 1)} so is inside the length of the table. 840 * 841 * @param p the cumulative probability 842 * @return the guide table index 843 */ 844 private int getGuideTableIndex(double p) { 845 // Note: This is only ever called when p is in the range of the cumulative 846 // probability table. So assume 0 <= p <= 1. 847 return (int) (p * (guideTable.length - 1)); 848 } 849 850 /** {@inheritDoc} */ 851 @Override 852 public int sample() { 853 // Note on the algorithm: 854 // 1. Compute a cumulative probability with a uniform deviate (u). 855 // 2. If the probability lies within the tabulated cumulative probabilities 856 // then find the sample value. 857 // 3. If possible expand the tabulated cumulative probabilities up to the value u. 858 // 4. If value u exceeds the capacity for the tabulated cumulative probabilities 859 // then compute the sample value dynamically without storing the probabilities. 860 861 // Compute a probability 862 final double u = rng.nextDouble(); 863 864 // Check the tabulated cumulative probability table 865 if (u <= cumulativeProbability[tabulatedX]) { 866 // Initialise the search using a guide table to find an initial guess. 867 // The table provides an upper bound on the sample for a known cumulative probability. 868 int sample = guideTable[getGuideTableIndex(u)] - 1; 869 // If u is above the sample probability (this occurs due to truncation) 870 // then return the next value up. 871 if (u > cumulativeProbability[sample]) { 872 return sample + 1; 873 } 874 // Search down 875 while (sample != 0 && u <= cumulativeProbability[sample - 1]) { 876 sample--; 877 } 878 return sample; 879 } 880 881 // The sample is above the tabulated cumulative probability for x 882 int x1 = tabulatedX + 1; 883 884 // Fill the cumulative probability table if possible 885 while (x1 < cumulativeProbability.length) { 886 probabilityX = nextProbability(probabilityX, x1); 887 // Compute the next cumulative probability f(x+1) and update 888 final double sum = cumulativeProbability[tabulatedX] + probabilityX; 889 cumulativeProbability[++tabulatedX] = sum; 890 updateGuideTable(sum, tabulatedX); 891 // Check if this is the correct sample 892 if (u <= sum) { 893 return tabulatedX; 894 } 895 x1 = tabulatedX + 1; 896 } 897 898 // The sample is above the range of the cumulative probability table. 899 // Compute using the Kemp method. 900 // This requires the remaining cumulative probability and the probability for x+1. 901 return sampleWithinLongTail(u - cumulativeProbability[tabulatedX], x1, nextProbability(probabilityX, x1)); 902 } 903 904 /** 905 * Compute the next probability using a recurrence relation. 906 * 907 * <pre> 908 * p(x + 1) = p(x) * mean / (x + 1) 909 * </pre> 910 * 911 * @param px the probability of x 912 * @param x1 the value of x+1 913 * @return the probability of x+1 914 */ 915 private double nextProbability(double px, int x1) { 916 return px * mean / x1; 917 } 918 919 /** 920 * Compute the sample assuming it is in the long tail of the distribution. 921 * 922 * <p>This requires a check on the next probability value which is expected to 923 * asymptote to zero. 924 * 925 * @param u the remaining cumulative probability 926 * @param x the current sample value X 927 * @param p the current probability of the sample (p(X=x)) 928 * @return the sample X 929 */ 930 private int sampleWithinLongTail(double u, int x, double p) { 931 // Note on the algorithm: 932 // - X is the unknown sample deviate (the output of the algorithm) 933 // - x is the current value from the distribution 934 // - p is the probability of the current value x, p(X=x) 935 // - u is effectively the cumulative probability that the sample X 936 // is equal or above the current value x, p(X>=x) 937 // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x 938 while (u > p) { 939 // Update the remaining cumulative probability 940 u -= p; 941 p = nextProbability(p, ++x); 942 // The algorithm listed in Kemp (1981) does not check that the rolling 943 // probability is positive. This check is added to ensure no errors when the 944 // limit of the summation 1 - sum(p(x)) is above 0 due to cumulative error in 945 // floating point arithmetic when in the long tail of the distribution. 946 if (p == 0) { 947 break; 948 } 949 } 950 return x; 951 } 952 } 953 954 /** 955 * Sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson distribution</a>. 956 * 957 * <ul> 958 * <li> 959 * For small means, a Poisson process is simulated using uniform deviates, as 960 * described in Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, 961 * Volume 2. Addison Wesley. 962 * </li> 963 * </ul> 964 * 965 * <p>This sampler is suitable for {@code mean < 20}.</p> 966 * 967 * <p>Sampling uses {@link UniformRandomProvider#nextInt()} and 32-bit integer arithmetic.</p> 968 * 969 * @see <a href="https://en.wikipedia.org/wiki/Poisson_distribution#Generating_Poisson-distributed_random_variables"> 970 * Poisson random variables</a> 971 */ 972 static class TinyMeanPoissonSampler 973 implements DiscreteSampler { 974 /** Pre-compute Poisson probability p(n=0) mapped to the range of a 32-bit unsigned fraction. */ 975 private final long p0; 976 /** Underlying source of randomness. */ 977 private final UniformRandomProvider rng; 978 979 /** 980 * @param rng Generator of uniformly distributed random numbers. 981 * @param mean Mean. 982 * @throws IllegalArgumentException if {@code mean <= 0} or {@code Math.exp(-mean) * (1L << 32)} 983 * is not positive. 984 */ 985 TinyMeanPoissonSampler(UniformRandomProvider rng, 986 double mean) { 987 this.rng = rng; 988 if (mean <= 0) { 989 throw new IllegalArgumentException(); 990 } 991 // Math.exp(-mean) is the probability of a Poisson distribution for n=0 (p(n=0)). 992 // This is mapped to a 32-bit range as the numerator of a 32-bit fraction 993 // for use in optimised 32-bit arithmetic. 994 p0 = (long) (Math.exp(-mean) * 0x100000000L); 995 if (p0 == 0) { 996 throw new IllegalArgumentException("No p(x=0) probability for mean: " + mean); 997 } 998 } 999 1000 /** {@inheritDoc} */ 1001 @Override 1002 public int sample() { 1003 int n = 0; 1004 // The unsigned 32-bit sample represents the fraction x / 2^32 where x is [0,2^32-1]. 1005 // The upper bound is exclusive hence the fraction is a uniform deviate from [0,1). 1006 long r = nextUnsigned32(); 1007 // The limit is the probability p(n=0) converted to an unsigned fraction. 1008 while (r >= p0) { 1009 // Compute the fraction: 1010 // r [0,2^32) 2^32 1011 // ---- * -------- / ---- 1012 // 2^32 2^32 2^32 1013 // This rounds down the fraction numerator when the lower 32-bits are discarded. 1014 r = (r * nextUnsigned32()) >>> 32; 1015 n++; 1016 } 1017 // Ensure the value is positive in the worst case scenario of a broken 1018 // generator that returns 0xffffffff for each sample. This will cause 1019 // the integer counter to overflow 2^31-1 but not exceed 2^32. The fraction 1020 // multiplication effectively turns r into a counter decrementing from 2^32-1 1021 // to zero. 1022 return (n >= 0) ? n : Integer.MAX_VALUE; 1023 } 1024 1025 /** 1026 * Get the next unsigned 32-bit random integer. 1027 * 1028 * @return the random integer 1029 */ 1030 private long nextUnsigned32() { 1031 return rng.nextInt() & 0xffffffffL; 1032 } 1033 } 1034 1035 // Benchmarks methods below. 1036 1037 /** 1038 * Baseline for the JMH timing overhead for production of an {@code int} value. 1039 * 1040 * @return the {@code int} value 1041 */ 1042 @Benchmark 1043 public int baselineInt() { 1044 return value; 1045 } 1046 1047 /** 1048 * Baseline for {@link Math#exp(double)}. 1049 * 1050 * @param mean the mean 1051 * @return the value 1052 */ 1053 @Benchmark 1054 public double baselineExp(Means mean) { 1055 return Math.exp(-mean.getMean()); 1056 } 1057 1058 /** 1059 * Run the sampler. 1060 * 1061 * @param sources Source of randomness. 1062 * @return the sample value 1063 */ 1064 @Benchmark 1065 public int sample(Sources sources) { 1066 return sources.getSampler().sample(); 1067 } 1068 1069 /** 1070 * Run the sampler. 1071 * 1072 * @param sources Source of randomness. 1073 * @return the sample value 1074 */ 1075 @Benchmark 1076 public int singleSample(Sources sources) { 1077 return sources.createSampler().sample(); 1078 } 1079 }