View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.rng.examples.jmh.sampling.distribution;
19  
20  import org.apache.commons.rng.UniformRandomProvider;
21  import org.apache.commons.rng.sampling.distribution.DiscreteSampler;
22  import org.apache.commons.rng.sampling.distribution.KempSmallMeanPoissonSampler;
23  import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler;
24  import org.apache.commons.rng.sampling.distribution.SmallMeanPoissonSampler;
25  import org.apache.commons.rng.simple.RandomSource;
26  
27  import org.openjdk.jmh.annotations.Benchmark;
28  import org.openjdk.jmh.annotations.BenchmarkMode;
29  import org.openjdk.jmh.annotations.Fork;
30  import org.openjdk.jmh.annotations.Measurement;
31  import org.openjdk.jmh.annotations.Mode;
32  import org.openjdk.jmh.annotations.OutputTimeUnit;
33  import org.openjdk.jmh.annotations.Param;
34  import org.openjdk.jmh.annotations.Scope;
35  import org.openjdk.jmh.annotations.Setup;
36  import org.openjdk.jmh.annotations.State;
37  import org.openjdk.jmh.annotations.Warmup;
38  
39  import java.util.concurrent.TimeUnit;
40  
41  /**
42   * Executes benchmark to compare the speed of generation of Poisson distributed random numbers.
43   */
44  @BenchmarkMode(Mode.AverageTime)
45  @OutputTimeUnit(TimeUnit.NANOSECONDS)
46  @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
47  @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
48  @State(Scope.Benchmark)
49  @Fork(value = 1, jvmArgs = {"-server", "-Xms128M", "-Xmx128M"})
50  public class PoissonSamplersPerformance {
51      /**
52       * The value for the baseline generation of an {@code int} value.
53       *
54       * <p>This must NOT be final!</p>
55       */
56      private int value;
57  
58      /**
59       * The mean for the call to {@link Math#exp(double)}.
60       */
61      @State(Scope.Benchmark)
62      public static class Means {
63          /**
64           * The Poisson mean. This is set at a level where the small mean sampler is to be used
65           * in preference to the large mean sampler.
66           */
67          @Param({"0.25",
68                  "0.5",
69                  "1",
70                  "2",
71                  "4",
72                  "8",
73                  "16",
74                  "32",
75                  })
76          private double mean;
77  
78          /**
79           * Gets the mean.
80           *
81           * @return the mean
82           */
83          public double getMean() {
84              return mean;
85          }
86      }
87  
88      /**
89       * The {@link DiscreteSampler} samplers to use for testing. Creates the sampler for each
90       * {@link RandomSource} in the default
91       * {@link org.apache.commons.rng.examples.jmh.RandomSources RandomSources}.
92       */
93      @State(Scope.Benchmark)
94      public static class Sources {
95          /**
96           * RNG providers.
97           *
98           * <p>Use different speeds.</p>
99           *
100          * @see <a href="https://commons.apache.org/proper/commons-rng/userguide/rng.html">
101          *      Commons RNG user guide</a>
102          */
103         @Param({
104                 "WELL_44497_B",
105                 //"ISAAC",
106                 "XO_RO_SHI_RO_128_PLUS",
107                 })
108         private String randomSourceName;
109 
110         /**
111          * The sampler type.
112          */
113         @Param({"SmallMeanPoissonSampler",
114                 "KempSmallMeanPoissonSampler",
115                 "BoundedKempSmallMeanPoissonSampler",
116                 "KempSmallMeanPoissonSamplerP50",
117                 "KempSmallMeanPoissonSamplerBinarySearch",
118                 "KempSmallMeanPoissonSamplerGuideTable",
119                 "LargeMeanPoissonSampler",
120                 "TinyMeanPoissonSampler",
121                 })
122         private String samplerType;
123 
124         /**
125          * The Poisson mean. This is set at a level where the small mean sampler is to be used
126          * in preference to the large mean sampler.
127          */
128         @Param({"0.25",
129                 "0.5",
130                 "1",
131                 "2",
132                 "4",
133                 "8",
134                 "16",
135                 "32",
136                 "64",
137                 })
138         private double mean;
139 
140         /** RNG. */
141         private UniformRandomProvider generator;
142 
143         /** The factory. */
144         private DiscreteSamplerFactory factory;
145 
146         /** The sampler. */
147         private DiscreteSampler sampler;
148 
149         /**
150          * A factory for creating DiscreteSampler objects.
151          */
152         interface DiscreteSamplerFactory {
153             /**
154              * Creates the sampler.
155              *
156              * @return the sampler
157              */
158             DiscreteSampler create();
159         }
160 
161         /**
162          * @return The RNG.
163          */
164         public UniformRandomProvider getGenerator() {
165             return generator;
166         }
167 
168         /**
169          * Gets the sampler.
170          *
171          * @return The sampler.
172          */
173         public DiscreteSampler getSampler() {
174             return sampler;
175         }
176 
177         /** Instantiates sampler. */
178         @Setup
179         public void setup() {
180             final RandomSource randomSource = RandomSource.valueOf(randomSourceName);
181             generator = RandomSource.create(randomSource);
182 
183             // This would benefit from Java 8 Supplier<DiscreteSampler> lambda function
184             if ("SmallMeanPoissonSampler".equals(samplerType)) {
185                 factory = new DiscreteSamplerFactory() {
186                     @Override
187                     public DiscreteSampler create() {
188                         return SmallMeanPoissonSampler.of(generator, mean);
189                     }
190                 };
191             } else if ("KempSmallMeanPoissonSampler".equals(samplerType)) {
192                 factory = new DiscreteSamplerFactory() {
193                     @Override
194                     public DiscreteSampler create() {
195                         return KempSmallMeanPoissonSampler.of(generator, mean);
196                     }
197                 };
198             } else if ("BoundedKempSmallMeanPoissonSampler".equals(samplerType)) {
199                 factory = new DiscreteSamplerFactory() {
200                     @Override
201                     public DiscreteSampler create() {
202                         return new BoundedKempSmallMeanPoissonSampler(generator, mean);
203                     }
204                 };
205             } else if ("KempSmallMeanPoissonSamplerP50".equals(samplerType)) {
206                 factory = new DiscreteSamplerFactory() {
207                     @Override
208                     public DiscreteSampler create() {
209                         return new KempSmallMeanPoissonSamplerP50(generator, mean);
210                     }
211                 };
212             } else if ("KempSmallMeanPoissonSamplerBinarySearch".equals(samplerType)) {
213                 factory = new DiscreteSamplerFactory() {
214                     @Override
215                     public DiscreteSampler create() {
216                         return new KempSmallMeanPoissonSamplerBinarySearch(generator, mean);
217                     }
218                 };
219             } else if ("KempSmallMeanPoissonSamplerGuideTable".equals(samplerType)) {
220                 factory = new DiscreteSamplerFactory() {
221                     @Override
222                     public DiscreteSampler create() {
223                         return new KempSmallMeanPoissonSamplerGuideTable(generator, mean);
224                     }
225                 };
226             } else if ("LargeMeanPoissonSampler".equals(samplerType)) {
227                 factory = new DiscreteSamplerFactory() {
228                     @Override
229                     public DiscreteSampler create() {
230                         // Note this is not valid when mean < 1
231                         return LargeMeanPoissonSampler.of(generator, mean);
232                     }
233                 };
234             } else if ("TinyMeanPoissonSampler".equals(samplerType)) {
235                 factory = new DiscreteSamplerFactory() {
236                     @Override
237                     public DiscreteSampler create() {
238                         // Note this is only valid when mean < -Math.exp(0x1p-32) == 22.18
239                         return new TinyMeanPoissonSampler(generator, mean);
240                     }
241                 };
242             }
243             sampler = factory.create();
244         }
245 
246         /**
247          * Creates a new instance of the sampler.
248          *
249          * @return The sampler.
250          */
251         public DiscreteSampler createSampler() {
252             return factory.create();
253         }
254     }
255 
256     /**
257      * Kemp sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson
258      * distribution</a>.
259      *
260      * <ul>
261      *  <li>
262      *   For small means, a Poisson process is simulated using uniform deviates, as
263      *   described in Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed
264      *   Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253.
265      *  </li>
266      * </ul>
267      *
268      * <p>Note: This is similar to {@link KempSmallMeanPoissonSampler} but the sample is
269      * bounded by 1000 * mean.</p>
270      *
271      * @see <a href="https://www.jstor.org/stable/2346348">Kemp, A.W. (1981) JRSS Vol. 30, pp. 249-253</a>
272      */
273     static class BoundedKempSmallMeanPoissonSampler
274         implements DiscreteSampler {
275         /** Underlying source of randomness. */
276         private final UniformRandomProvider rng;
277         /**
278          * Pre-compute {@code Math.exp(-mean)}.
279          * Note: This is the probability of the Poisson sample {@code p(x=0)}.
280          */
281         private final double p0;
282         /** Pre-compute {@code 1000 * mean} as the upper limit of the sample. */
283         private final int limit;
284         /**
285          * The mean of the Poisson sample.
286          */
287         private final double mean;
288 
289         /**
290          * @param rng  Generator of uniformly distributed random numbers.
291          * @param mean Mean.
292          * @throws IllegalArgumentException if {@code mean <= 0} or {@code mean > 700}.
293          */
294         BoundedKempSmallMeanPoissonSampler(UniformRandomProvider rng,
295                                             double mean) {
296             if (mean <= 0) {
297                 throw new IllegalArgumentException();
298             }
299 
300             p0 = Math.exp(-mean);
301             if (p0 == 0) {
302                 throw new IllegalArgumentException();
303             }
304             // The returned sample is bounded by 1000 * mean
305             limit = (int) Math.ceil(1000 * mean);
306             this.rng = rng;
307             this.mean = mean;
308         }
309 
310         /** {@inheritDoc} */
311         @Override
312         public int sample() {
313             // Note on the algorithm:
314             // - X is the unknown sample deviate (the output of the algorithm)
315             // - x is the current value from the distribution
316             // - p is the probability of the current value x, p(X=x)
317             // - u is effectively the cumulative probability that the sample X
318             //   is equal or above the current value x, p(X>=x)
319             // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
320             double u = rng.nextDouble();
321             int x = 0;
322             double p = p0;
323             while (u > p) {
324                 u -= p;
325                 // Compute the next probability using a recurrence relation.
326                 // p(x+1) = p(x) * mean / (x+1)
327                 p *= mean / ++x;
328                 // The algorithm listed in Kemp (1981) does not check that the rolling probability
329                 // is positive. This check is added to ensure a simple bounds in the event that
330                 // p == 0
331                 if (x == limit) {
332                     return x;
333                 }
334             }
335             return x;
336         }
337     }
338 
339     /**
340      * Kemp sampler for the Poisson distribution.
341      *
342      * <p>Note: This is a modification of the original algorithm by Kemp. It implements a hedge
343      * on the cumulative probability set at 50%. This saves computation in half of the samples.</p>
344      */
345     static class KempSmallMeanPoissonSamplerP50
346         implements DiscreteSampler {
347         /** The value of p=0.5. */
348         private static final double ONE_HALF = 0.5;
349         /**
350          * The threshold that defines the cumulative probability for the long tail of the
351          * Poisson distribution. Above this threshold the recurrence relation that computes the
352          * next probability must check that the p-value is not zero.
353          */
354         private static final double LONG_TAIL_THRESHOLD = 0.999;
355 
356         /** Underlying source of randomness. */
357         private final UniformRandomProvider rng;
358         /**
359          * Pre-compute {@code Math.exp(-mean)}.
360          * Note: This is the probability of the Poisson sample {@code p(x=0)}.
361          */
362         private final double p0;
363         /**
364          * The mean of the Poisson sample.
365          */
366         private final double mean;
367         /**
368          * Pre-compute the cumulative probability for all samples up to and including x.
369          * This is F(x) = sum of p(X<=x).
370          *
371          * <p>The value is computed at approximately 50% allowing the algorithm to choose to start
372          * at value (x+1) approximately half of the time.
373          */
374         private final double fx;
375         /**
376          * Store the value (x+1) corresponding to the next value after the cumulative probability is
377          * above 50%.
378          */
379         private final int x1;
380         /**
381          * Store the probability value p(x+1), allowing the algorithm to start from the point x+1.
382          */
383         private final double px1;
384 
385         /**
386          * Create a new instance.
387          *
388          * <p>This is valid for means as large as approximately {@code 744}.</p>
389          *
390          * @param rng  Generator of uniformly distributed random numbers.
391          * @param mean Mean.
392          * @throws IllegalArgumentException if {@code mean <= 0} or {@code Math.exp(-mean) == 0}.
393          */
394         KempSmallMeanPoissonSamplerP50(UniformRandomProvider rng,
395                                        double mean) {
396             if (mean <= 0) {
397                 throw new IllegalArgumentException();
398             }
399 
400             this.rng = rng;
401             p0 = Math.exp(-mean);
402             this.mean = mean;
403 
404             // Pre-compute a hedge value for the cumulative probability at approximately 50%.
405             // This is only done when p0 is less than the long tail threshold.
406             // The result is that the rolling probability computation should never hit the
407             // long tail where p reaches zero.
408             if (p0 <= LONG_TAIL_THRESHOLD) {
409                 // Check the edge case for no probability
410                 if (p0 == 0) {
411                     throw new IllegalArgumentException();
412                 }
413 
414                 double p = p0;
415                 int x = 0;
416                 // Sum is cumulative probability F(x) = sum p(X<=x)
417                 double sum = p;
418                 while (sum < ONE_HALF) {
419                     // Compute the next probability using a recurrence relation.
420                     // p(x+1) = p(x) * mean / (x+1)
421                     p *= mean / ++x;
422                     sum += p;
423                 }
424                 fx = sum;
425                 x1 = x + 1;
426                 px1 = p * mean / x1;
427             } else {
428                 // Always start at zero.
429                 // Note: If NaN is input as the mean this path is executed and the sample is always zero.
430                 fx = 0;
431                 x1 = 0;
432                 px1 = p0;
433             }
434         }
435 
436         /** {@inheritDoc} */
437         @Override
438         public int sample() {
439             // Note on the algorithm:
440             // - X is the unknown sample deviate (the output of the algorithm)
441             // - x is the current value from the distribution
442             // - p is the probability of the current value x, p(X=x)
443             // - u is effectively the cumulative probability that the sample X
444             //   is equal or above the current value x, p(X>=x)
445             // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
446             final double u = rng.nextDouble();
447 
448             if (u <= fx) {
449                 // Sample from the lower half of the distribution starting at zero
450                 return sampleBeforeLongTail(u, 0, p0);
451             }
452 
453             // Sample from the upper half of the distribution starting at cumulative probability fx.
454             // This is reached when u > fx and sample X > x.
455 
456             // If below the long tail threshold then omit the check on the asymptote of p -> zero
457             if (u <= LONG_TAIL_THRESHOLD) {
458                 return sampleBeforeLongTail(u - fx, x1, px1);
459             }
460 
461             return sampleWithinLongTail(u - fx, x1, px1);
462         }
463 
464         /**
465          * Compute the sample assuming it is <strong>not</strong> in the long tail of the distribution.
466          *
467          * <p>This avoids a check on the next probability value assuming that the cumulative probability
468          * is at a level where the long tail of the Poisson distribution will not be reached.
469          *
470          * @param u the remaining cumulative probability (p(X>x))
471          * @param x the current sample value X
472          * @param p the current probability of the sample (p(X=x))
473          * @return the sample X
474          */
475         private int sampleBeforeLongTail(double u, int x, double p) {
476             while (u > p) {
477                 // Update the remaining cumulative probability
478                 u -= p;
479                 // Compute the next probability using a recurrence relation.
480                 // p(x+1) = p(x) * mean / (x+1)
481                 p *= mean / ++x;
482                 // The algorithm listed in Kemp (1981) does not check that the rolling probability
483                 // is positive (non-zero). This is omitted here on the assumption that the cumulative
484                 // probability will not be in the long tail where the probability asymptotes to zero.
485             }
486             return x;
487         }
488 
489         /**
490          * Compute the sample assuming it is in the long tail of the distribution.
491          *
492          * <p>This requires a check on the next probability value which is expected to asymptote to zero.
493          *
494          * @param u the remaining cumulative probability
495          * @param x the current sample value X
496          * @param p the current probability of the sample (p(X=x))
497          * @return the sample X
498          */
499         private int sampleWithinLongTail(double u, int x, double p) {
500             while (u > p) {
501                 // Update the remaining cumulative probability
502                 u -= p;
503                 // Compute the next probability using a recurrence relation.
504                 // p(x+1) = p(x) * mean / (x+1)
505                 p *= mean / ++x;
506                 // The algorithm listed in Kemp (1981) does not check that the rolling probability
507                 // is positive. This check is added to ensure no errors when the limit of the summation
508                 // 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic when
509                 // in the long tail of the distribution.
510                 if (p == 0) {
511                     return x;
512                 }
513             }
514             return x;
515         }
516     }
517 
518     /**
519      * Kemp sampler for the Poisson distribution.
520      *
521      * <p>Note: This is a modification of the original algorithm by Kemp. It stores the
522      * cumulative probability table for repeat use. The table is searched using a binary
523      * search algorithm.</p>
524      */
525     static class KempSmallMeanPoissonSamplerBinarySearch
526         implements DiscreteSampler {
527         /**
528          * Store the cumulative probability table size for integer means so that 99.99%
529          * of the Poisson distribution is covered. This is done until the table size is
530          * 2 * mean.
531          *
532          * <p>At higher mean the expected range is mean + 4 * sqrt(mean). To avoid the sqrt
533          * the conservative limit of 2 * mean is used.
534          */
535         private static final int[] TABLE_SIZE = {
536             /* mean 1 to 10. */
537             8, 10, 12, 14, 16, 18, 20, 22, 24, 25,
538             /* mean 11 to 20. */
539             27, 29, 30, 32, 33, 35, 36, 38, 39, 41,
540         };
541 
542         /** Underlying source of randomness. */
543         private final UniformRandomProvider rng;
544         /**
545          * The mean of the Poisson sample.
546          */
547         private final double mean;
548         /**
549          * Store the cumulative probability for all samples up to and including x.
550          * This is F(x) = sum of p(X<=x).
551          *
552          * <p>This table is initialised to store cumulative probabilities for x up to 2 * mean
553          * or 99.99% (whichever is larger).
554          */
555         private final double[] fx;
556         /**
557          * Store the value x corresponding to the last stored cumulative probability.
558          */
559         private int lastX;
560         /**
561          * Store the probability value p(x) corresponding to last stored cumulative probability,
562          * allowing the algorithm to start from the point x.
563          */
564         private double px;
565 
566         /**
567          * Create a new instance.
568          *
569          * <p>This is valid for means as large as approximately {@code 744}.</p>
570          *
571          * @param rng  Generator of uniformly distributed random numbers.
572          * @param mean Mean.
573          * @throws IllegalArgumentException if {@code mean <= 0} or {@code Math.exp(-mean) == 0}.
574          */
575         KempSmallMeanPoissonSamplerBinarySearch(UniformRandomProvider rng,
576                                                 double mean) {
577             if (mean <= 0) {
578                 throw new IllegalArgumentException();
579             }
580 
581             px = Math.exp(-mean);
582             if (px > 0) {
583                 this.rng = rng;
584                 this.mean = mean;
585 
586                 // Initialise the cumulative probability table.
587                 // The supported mean where p(x=0) > 0 sets a limit of around 744 so this will always be
588                 // possible.
589                 final int upperMean = (int) Math.ceil(mean);
590                 fx = new double[(upperMean < TABLE_SIZE.length) ? TABLE_SIZE[upperMean] : upperMean * 2];
591                 fx[0] = px;
592             } else {
593                 // This will catch NaN mean values
594                 throw new IllegalArgumentException();
595             }
596         }
597 
598         /** {@inheritDoc} */
599         @Override
600         public int sample() {
601             // Note on the algorithm:
602             // - X is the unknown sample deviate (the output of the algorithm)
603             // - x is the current value from the distribution
604             // - p is the probability of the current value x, p(X=x)
605             // - u is effectively the cumulative probability that the sample X
606             //   is equal or above the current value x, p(X>=x)
607             // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
608             final double u = rng.nextDouble();
609 
610             if (u <= fx[lastX]) {
611                 // Binary search within known cumulative probability table.
612                 // Find x so that u > f[x-1] and u <= f[x].
613                 // This is a looser search than Arrays.binarySearch:
614                 // - The output is x = upper.
615                 // - The pre-condition check ensures u <= f[upper] at the start.
616                 // - The table stores probabilities where f[0] is non-zero.
617                 // - u should be >= 0 (or the random generator is broken).
618                 // - It avoids comparisons using Double.doubleToLongBits.
619                 // - It avoids the low likelihood of equality between two doubles so uses
620                 //   only 1 compare per loop.
621                 // - It uses a weighted middle anticipating that the cumulative distribution
622                 //   is skewed as the expected use case is a small mean.
623                 int lower = 0;
624                 int upper = lastX;
625                 while (lower < upper) {
626                     // Weighted middle is 1/4 of the range between lower and upper
627                     final int mid = (3 * lower + upper) >>> 2;
628                     final double midVal = fx[mid];
629                     if (u > midVal) {
630                         // Change lower such that
631                         // u > f[lower - 1]
632                         lower = mid + 1;
633                     } else {
634                         // Change upper such that
635                         // u <= f[upper]
636                         upper = mid;
637                     }
638                 }
639                 return upper;
640             }
641 
642             // The sample is above x
643             int x1 = lastX + 1;
644 
645             // Fill the cumulative probability table if possible
646             while (x1 < fx.length) {
647                 // Compute the next probability using a recurrence relation.
648                 // p(x+1) = p(x) * mean / (x+1)
649                 px = nextProbability(px, x1);
650                 // Compute the next cumulative probability f(x+1) and update
651                 final double sum = fx[lastX] + px;
652                 fx[++lastX] = sum;
653                 // Check if this is the correct sample
654                 if (u <= sum) {
655                     return lastX;
656                 }
657                 x1 = lastX + 1;
658             }
659 
660             // The sample is above the range of the cumulative probability table.
661             // Compute using the Kemp method.
662             // This requires the remaining cumulative probability and the probability for x+1.
663             return sampleWithinLongTail(u - fx[lastX], x1, nextProbability(px, x1));
664         }
665 
666         /**
667          * Compute the next probability using a recurrence relation.
668          *
669          * <pre>
670          * p(x + 1) = p(x) * mean / (x + 1)
671          * </pre>
672          *
673          * @param p the probability of x
674          * @param x1 the value of x+1
675          * @return the probability of x+1
676          */
677         private double nextProbability(double p, int x1) {
678             return p * mean / x1;
679         }
680 
681         /**
682          * Compute the sample assuming it is in the long tail of the distribution.
683          *
684          * <p>This requires a check on the next probability value which is expected to asymptote to zero.
685          *
686          * @param u the remaining cumulative probability
687          * @param x the current sample value X
688          * @param p the current probability of the sample (p(X=x))
689          * @return the sample X
690          */
691         private int sampleWithinLongTail(double u, int x, double p) {
692             while (u > p) {
693                 // Update the remaining cumulative probability
694                 u -= p;
695                 p = nextProbability(p, ++x);
696                 // The algorithm listed in Kemp (1981) does not check that the rolling probability
697                 // is positive. This check is added to ensure no errors when the limit of the summation
698                 // 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic when
699                 // in the long tail of the distribution.
700                 if (p == 0) {
701                     return x;
702                 }
703             }
704             return x;
705         }
706     }
707     /**
708      * Sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson
709      * distribution</a>.
710      *
711      * <p>Note: This is a modification of the original algorithm by Kemp. It stores the
712      * cumulative probability table for repeat use. The table is computed dynamically and
713      * searched using a guide table.</p>
714      */
715     static class KempSmallMeanPoissonSamplerGuideTable implements DiscreteSampler {
716         /**
717          * Store the cumulative probability table size for integer means so that 99.99% of the
718          * Poisson distribution is covered. This is done until the table size is 2 * mean.
719          *
720          * <p>At higher mean the expected range is mean + 4 * sqrt(mean). To avoid the sqrt
721          * the conservative limit of 2 * mean is used.
722          */
723         private static final int[] TABLE_SIZE = {
724             /* mean 1 to 10. */
725             8, 10, 12, 14, 16, 18, 20, 22, 24, 25,
726             /* mean 11 to 20. */
727             27, 29, 30, 32, 33, 35, 36, 38, 39, 41, };
728 
729         /** Underlying source of randomness. */
730         private final UniformRandomProvider rng;
731         /**
732          * The mean of the Poisson sample.
733          */
734         private final double mean;
735         /**
736          * Store the cumulative probability for all samples up to and including x. This is
737          * F(x) = sum of p(X<=x).
738          *
739          * <p>This table is initialised to store cumulative probabilities for x up to 2 * mean
740          * or 99.99% (whichever is larger).
741          */
742         private final double[] cumulativeProbability;
743         /**
744          * Store the value x corresponding to the last stored cumulative probability.
745          */
746         private int tabulatedX;
747         /**
748          * Store the probability value p(x), allowing the algorithm to start from the point x.
749          */
750         private double probabilityX;
751         /**
752          * The inverse cumulative probability guide table. This is a map between the cumulative
753          * probability (f(x)) and the value x. It is used to set the initial point for search
754          * of the cumulative probability table.
755          *
756          * <p>The index into the table is obtained using {@code f(x) * guideTable.length}. The value
757          * stored at the index is value {@code x+1} such that it is the exclusive upper bound
758          * on the sample value for searching the cumulative probability table. It requires the
759          * table search is towards zero.</p>
760          *
761          * <p>Note: Using x+1 ensures that the table can be zero filled upon initialisation and
762          * any index with zero has yet to be allocated.</p>
763          *
764          * <p>The guide table should never be used when the input f(x) is above the current range of
765          * the cumulative probability table. This would create an index that has not been
766          * allocated a mapping.
767          */
768         private final int[] guideTable;
769 
770         /**
771          * Create a new instance.
772          *
773          * <p>This is valid for means as large as approximately {@code 744}.</p>
774          *
775          * @param rng Generator of uniformly distributed random numbers.
776          * @param mean Mean.
777          * @throws IllegalArgumentException if {@code mean <= 0} or
778          * {@code Math.exp(-mean) == 0}.
779          */
780         KempSmallMeanPoissonSamplerGuideTable(UniformRandomProvider rng, double mean) {
781             if (mean <= 0) {
782                 throw new IllegalArgumentException("mean is not strictly positive: " + mean);
783             }
784 
785             probabilityX = Math.exp(-mean);
786             if (probabilityX > 0) {
787                 this.rng = rng;
788                 this.mean = mean;
789 
790                 // Initialise the cumulative probability table.
791                 // The supported mean where p(x=0) > 0 sets a limit of around 744 so the cast to int
792                 // will always be possible.
793                 final int upperMean = (int) Math.ceil(mean);
794                 final int size = (upperMean < TABLE_SIZE.length) ? TABLE_SIZE[upperMean] : upperMean * 2;
795                 cumulativeProbability = new double[size];
796                 cumulativeProbability[0] = probabilityX;
797 
798                 guideTable = new int[cumulativeProbability.length + 1];
799                 initialiseGuideTable(probabilityX);
800             } else {
801                 // This will catch NaN mean values
802                 throw new IllegalArgumentException("No probability for mean " + mean);
803             }
804         }
805 
806         /**
807          * Initialise the cumulative probability guide table. All guide indices at or below the
808          * index corresponding to the given probability will be set to 1.
809          *
810          * @param p0 the probability for x=0
811          */
812         private void initialiseGuideTable(double p0) {
813             for (int index = getGuideTableIndex(p0); index >= 0; index--) {
814                 guideTable[index] = 1;
815             }
816         }
817 
818         /**
819          * Fill the inverse cumulative probability guide table. Set the index corresponding to the
820          * given probability to x+1 establishing an exclusive upper bound on x for the probability.
821          * All unused guide indices below the index will also be set to x+1.
822          *
823          * @param p the cumulative probability
824          * @param x the sample value x
825          */
826         private void updateGuideTable(double p, int x) {
827             // Always set the current index as the guide table is the exclusive upper bound
828             // for searching the cumulative probability table for any value p.
829             // Then fill any lower positions that are not allocated.
830             final int x1 = x + 1;
831             int index = getGuideTableIndex(p);
832             do {
833                 guideTable[index--] = x1;
834             } while (index > 0 && guideTable[index] == 0);
835         }
836 
837         /**
838          * Gets the guide table index for the probability. This is obtained using
839          * {@code p * (guideTable.length - 1)} so is inside the length of the table.
840          *
841          * @param p the cumulative probability
842          * @return the guide table index
843          */
844         private int getGuideTableIndex(double p) {
845             // Note: This is only ever called when p is in the range of the cumulative
846             // probability table. So assume 0 <= p <= 1.
847             return (int) (p * (guideTable.length - 1));
848         }
849 
850         /** {@inheritDoc} */
851         @Override
852         public int sample() {
853             // Note on the algorithm:
854             // 1. Compute a cumulative probability with a uniform deviate (u).
855             // 2. If the probability lies within the tabulated cumulative probabilities
856             //    then find the sample value.
857             // 3. If possible expand the tabulated cumulative probabilities up to the value u.
858             // 4. If value u exceeds the capacity for the tabulated cumulative probabilities
859             //    then compute the sample value dynamically without storing the probabilities.
860 
861             // Compute a probability
862             final double u = rng.nextDouble();
863 
864             // Check the tabulated cumulative probability table
865             if (u <= cumulativeProbability[tabulatedX]) {
866                 // Initialise the search using a guide table to find an initial guess.
867                 // The table provides an upper bound on the sample for a known cumulative probability.
868                 int sample = guideTable[getGuideTableIndex(u)] - 1;
869                 // If u is above the sample probability (this occurs due to truncation)
870                 // then return the next value up.
871                 if (u > cumulativeProbability[sample]) {
872                     return sample + 1;
873                 }
874                 // Search down
875                 while (sample != 0 && u <= cumulativeProbability[sample - 1]) {
876                     sample--;
877                 }
878                 return sample;
879             }
880 
881             // The sample is above the tabulated cumulative probability for x
882             int x1 = tabulatedX + 1;
883 
884             // Fill the cumulative probability table if possible
885             while (x1 < cumulativeProbability.length) {
886                 probabilityX = nextProbability(probabilityX, x1);
887                 // Compute the next cumulative probability f(x+1) and update
888                 final double sum = cumulativeProbability[tabulatedX] + probabilityX;
889                 cumulativeProbability[++tabulatedX] = sum;
890                 updateGuideTable(sum, tabulatedX);
891                 // Check if this is the correct sample
892                 if (u <= sum) {
893                     return tabulatedX;
894                 }
895                 x1 = tabulatedX + 1;
896             }
897 
898             // The sample is above the range of the cumulative probability table.
899             // Compute using the Kemp method.
900             // This requires the remaining cumulative probability and the probability for x+1.
901             return sampleWithinLongTail(u - cumulativeProbability[tabulatedX], x1, nextProbability(probabilityX, x1));
902         }
903 
904         /**
905          * Compute the next probability using a recurrence relation.
906          *
907          * <pre>
908          * p(x + 1) = p(x) * mean / (x + 1)
909          * </pre>
910          *
911          * @param px the probability of x
912          * @param x1 the value of x+1
913          * @return the probability of x+1
914          */
915         private double nextProbability(double px, int x1) {
916             return px * mean / x1;
917         }
918 
919         /**
920          * Compute the sample assuming it is in the long tail of the distribution.
921          *
922          * <p>This requires a check on the next probability value which is expected to
923          * asymptote to zero.
924          *
925          * @param u the remaining cumulative probability
926          * @param x the current sample value X
927          * @param p the current probability of the sample (p(X=x))
928          * @return the sample X
929          */
930         private int sampleWithinLongTail(double u, int x, double p) {
931             // Note on the algorithm:
932             // - X is the unknown sample deviate (the output of the algorithm)
933             // - x is the current value from the distribution
934             // - p is the probability of the current value x, p(X=x)
935             // - u is effectively the cumulative probability that the sample X
936             // is equal or above the current value x, p(X>=x)
937             // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
938             while (u > p) {
939                 // Update the remaining cumulative probability
940                 u -= p;
941                 p = nextProbability(p, ++x);
942                 // The algorithm listed in Kemp (1981) does not check that the rolling
943                 // probability is positive. This check is added to ensure no errors when the
944                 // limit of the summation 1 - sum(p(x)) is above 0 due to cumulative error in
945                 // floating point arithmetic when in the long tail of the distribution.
946                 if (p == 0) {
947                     break;
948                 }
949             }
950             return x;
951         }
952     }
953 
954     /**
955      * Sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson distribution</a>.
956      *
957      * <ul>
958      *  <li>
959      *   For small means, a Poisson process is simulated using uniform deviates, as
960      *   described in Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming,
961      *   Volume 2. Addison Wesley.
962      *  </li>
963      * </ul>
964      *
965      * <p>This sampler is suitable for {@code mean < 20}.</p>
966      *
967      * <p>Sampling uses {@link UniformRandomProvider#nextInt()} and 32-bit integer arithmetic.</p>
968      *
969      * @see <a href="https://en.wikipedia.org/wiki/Poisson_distribution#Generating_Poisson-distributed_random_variables">
970      * Poisson random variables</a>
971      */
972     static class TinyMeanPoissonSampler
973         implements DiscreteSampler {
974         /** Pre-compute Poisson probability p(n=0) mapped to the range of a 32-bit unsigned fraction. */
975         private final long p0;
976         /** Underlying source of randomness. */
977         private final UniformRandomProvider rng;
978 
979         /**
980          * @param rng  Generator of uniformly distributed random numbers.
981          * @param mean Mean.
982          * @throws IllegalArgumentException if {@code mean <= 0} or {@code Math.exp(-mean) * (1L << 32)}
983          * is not positive.
984          */
985         TinyMeanPoissonSampler(UniformRandomProvider rng,
986                                double mean) {
987             this.rng = rng;
988             if (mean <= 0) {
989                 throw new IllegalArgumentException();
990             }
991             // Math.exp(-mean) is the probability of a Poisson distribution for n=0 (p(n=0)).
992             // This is mapped to a 32-bit range as the numerator of a 32-bit fraction
993             // for use in optimised 32-bit arithmetic.
994             p0 = (long) (Math.exp(-mean) * 0x100000000L);
995             if (p0 == 0) {
996                 throw new IllegalArgumentException("No p(x=0) probability for mean: " + mean);
997             }
998         }
999 
1000         /** {@inheritDoc} */
1001         @Override
1002         public int sample() {
1003             int n = 0;
1004             // The unsigned 32-bit sample represents the fraction x / 2^32 where x is [0,2^32-1].
1005             // The upper bound is exclusive hence the fraction is a uniform deviate from [0,1).
1006             long r = nextUnsigned32();
1007             // The limit is the probability p(n=0) converted to an unsigned fraction.
1008             while (r >= p0) {
1009                 // Compute the fraction:
1010                 //  r       [0,2^32)      2^32
1011                 // ----  *  --------   /  ----
1012                 // 2^32       2^32        2^32
1013                 // This rounds down the fraction numerator when the lower 32-bits are discarded.
1014                 r = (r * nextUnsigned32()) >>> 32;
1015                 n++;
1016             }
1017             // Ensure the value is positive in the worst case scenario of a broken
1018             // generator that returns 0xffffffff for each sample. This will cause
1019             // the integer counter to overflow 2^31-1 but not exceed 2^32. The fraction
1020             // multiplication effectively turns r into a counter decrementing from 2^32-1
1021             // to zero.
1022             return (n >= 0) ? n : Integer.MAX_VALUE;
1023         }
1024 
1025         /**
1026          * Get the next unsigned 32-bit random integer.
1027          *
1028          * @return the random integer
1029          */
1030         private long nextUnsigned32() {
1031             return rng.nextInt() & 0xffffffffL;
1032         }
1033     }
1034 
1035     // Benchmarks methods below.
1036 
1037     /**
1038      * Baseline for the JMH timing overhead for production of an {@code int} value.
1039      *
1040      * @return the {@code int} value
1041      */
1042     @Benchmark
1043     public int baselineInt() {
1044         return value;
1045     }
1046 
1047     /**
1048      * Baseline for {@link Math#exp(double)}.
1049      *
1050      * @param mean the mean
1051      * @return the value
1052      */
1053     @Benchmark
1054     public double baselineExp(Means mean) {
1055         return Math.exp(-mean.getMean());
1056     }
1057 
1058     /**
1059      * Run the sampler.
1060      *
1061      * @param sources Source of randomness.
1062      * @return the sample value
1063      */
1064     @Benchmark
1065     public int sample(Sources sources) {
1066         return sources.getSampler().sample();
1067     }
1068 
1069     /**
1070      * Run the sampler.
1071      *
1072      * @param sources Source of randomness.
1073      * @return the sample value
1074      */
1075     @Benchmark
1076     public int singleSample(Sources sources) {
1077         return sources.createSampler().sample();
1078     }
1079 }