View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.rng.sampling.distribution;
18  
19  import org.apache.commons.rng.UniformRandomProvider;
20  import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler.LargeMeanPoissonSamplerState;
21  
22  /**
23   * Create a sampler for the
24   * <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson
25   * distribution</a> using a cache to minimise construction cost.
26   *
27   * <p>The cache will return a sampler equivalent to
28   * {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}.
29   *
30   * <p>The cache allows the {@link PoissonSampler} construction cost to be minimised
31   * for low size Poisson samples. The cache stores state for a range of integers where
32   * integer value {@code n} can be used to construct a sampler for the range
33   * {@code n <= mean < n+1}.
34   *
35   * <p>The cache is advantageous under the following conditions:
36   *
37   * <ul>
38   *   <li>The mean of the Poisson distribution falls within a known range.
39   *   <li>The sample size to be made with the <strong>same</strong> sampler is
40   *       small.
41   *   <li>The Poisson samples have different means with the same integer
42   *       value(s) after rounding down.
43   * </ul>
44   *
45   * <p>If the sample size to be made with the <strong>same</strong> sampler is large
46   * then the construction cost is low compared to the sampling time and the cache
47   * has minimal benefit.
48   *
49   * <p>Performance improvement is dependent on the speed of the
50   * {@link UniformRandomProvider}. A fast provider can obtain a two-fold speed
51   * improvement for a single-use Poisson sampler.
52   *
53   * <p>The cache is thread safe. Note that concurrent threads using the cache
54   * must ensure a thread safe {@link UniformRandomProvider} is used when creating
55   * samplers, e.g. a unique sampler per thread.
56   *
57   * @since 1.2
58   */
59  public class PoissonSamplerCache {
60  
61      /**
62       * The minimum N covered by the cache where
63       * {@code N = (int)Math.floor(mean)}.
64       */
65      private final int minN;
66      /**
67       * The maximum N covered by the cache where
68       * {@code N = (int)Math.floor(mean)}.
69       */
70      private final int maxN;
71      /** The cache of states between {@link minN} and {@link maxN}. */
72      private final LargeMeanPoissonSamplerState[] values;
73  
74      /**
75       * @param minMean The minimum mean covered by the cache.
76       * @param maxMean The maximum mean covered by the cache.
77       * @throws IllegalArgumentException if {@code maxMean < minMean}
78       */
79      public PoissonSamplerCache(double minMean,
80                                 double maxMean) {
81  
82          checkMeanRange(minMean, maxMean);
83  
84          // The cache can only be used for the LargeMeanPoissonSampler.
85          if (maxMean < PoissonSampler.PIVOT) {
86              // The upper limit is too small so no cache will be used.
87              // This class will just construct new samplers.
88              minN = 0;
89              maxN = 0;
90              values = null;
91          } else {
92              // Convert the mean into integers.
93              // Note the minimum is clipped to the algorithm switch point.
94              this.minN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT));
95              this.maxN = (int) Math.floor(Math.min(maxMean, Integer.MAX_VALUE));
96              values = new LargeMeanPoissonSamplerState[maxN - minN + 1];
97          }
98      }
99  
100     /**
101      * @param minN   The minimum N covered by the cache where {@code N = (int)Math.floor(mean)}.
102      * @param maxN   The maximum N covered by the cache where {@code N = (int)Math.floor(mean)}.
103      * @param states The precomputed states.
104      */
105     private PoissonSamplerCache(int minN,
106                                 int maxN,
107                                 LargeMeanPoissonSamplerState[] states) {
108         this.minN = minN;
109         this.maxN = maxN;
110         // Stored directly as the states were newly created within this class.
111         this.values = states;
112     }
113 
114     /**
115      * Check the mean range.
116      *
117      * @param minMean The minimum mean covered by the cache.
118      * @param maxMean The maximum mean covered by the cache.
119      * @throws IllegalArgumentException if {@code maxMean < minMean}
120      */
121     private static void checkMeanRange(double minMean, double maxMean)
122     {
123         // Note:
124         // Although a mean of 0 is invalid for a Poisson sampler this case
125         // is handled to make the cache user friendly. Any low means will
126         // be handled by the SmallMeanPoissonSampler and not cached.
127         // For this reason it is also OK if the means are negative.
128 
129         // Allow minMean == maxMean so that the cache can be used
130         // to create samplers with distinct RNGs and the same mean.
131         if (maxMean < minMean) {
132             throw new IllegalArgumentException(
133                     "Max mean: " + maxMean + " < " + minMean);
134         }
135     }
136 
137     /**
138      * Creates a new Poisson sampler.
139      *
140      * <p>The returned sampler will function exactly the
141      * same as {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}.
142      *
143      * @param rng  Generator of uniformly distributed random numbers.
144      * @param mean Mean.
145      * @return A Poisson sampler
146      * @throws IllegalArgumentException if {@code mean <= 0} or
147      * {@code mean >} {@link Integer#MAX_VALUE}.
148      */
149     public DiscreteSampler createPoissonSampler(UniformRandomProvider rng,
150                                                 double mean) {
151         // Ensure the same functionality as the PoissonSampler by
152         // using a SmallMeanPoissonSampler under the switch point.
153         if (mean < PoissonSampler.PIVOT) {
154             return new SmallMeanPoissonSampler(rng, mean);
155         }
156         if (mean > maxN) {
157             // Outside the range of the cache.
158             // This avoids extra parameter checks and handles the case when
159             // the cache is empty or if Math.floor(mean) is not an integer.
160             return new LargeMeanPoissonSampler(rng, mean);
161         }
162 
163         // Convert the mean into an integer.
164         final int n = (int) Math.floor(mean);
165         if (n < minN) {
166             // Outside the lower range of the cache.
167             return new LargeMeanPoissonSampler(rng, mean);
168         }
169 
170         // Look in the cache for a state that can be reused.
171         // Note: The cache is offset by minN.
172         final int index = n - minN;
173         final LargeMeanPoissonSamplerState state = values[index];
174         if (state == null) {
175             // Create a sampler and store the state for reuse.
176             // Do not worry about thread contention
177             // as the state is effectively immutable.
178             // If recomputed and replaced it will the same.
179             final LargeMeanPoissonSampler sampler = new LargeMeanPoissonSampler(rng, mean);
180             values[index] = sampler.getState();
181             return sampler;
182         }
183         // Compute the remaining fraction of the mean
184         final double lambdaFractional = mean - n;
185         return new LargeMeanPoissonSampler(rng, state, lambdaFractional);
186     }
187 
188     /**
189      * Check if the mean is within the range where the cache can minimise the
190      * construction cost of the {@link PoissonSampler}.
191      *
192      * @param mean
193      *            the mean
194      * @return true, if within the cache range
195      */
196     public boolean withinRange(double mean) {
197         if (mean < PoissonSampler.PIVOT) {
198             // Construction is optimal
199             return true;
200         }
201         // Convert the mean into an integer.
202         final int n = (int) Math.floor(mean);
203         return n <= maxN && n >= minN;
204     }
205 
206     /**
207      * Checks if the cache covers a valid range of mean values.
208      *
209      * <p>Note that the cache is only valid for one of the Poisson sampling
210      * algorithms. In the instance that a range was requested that was too
211      * low then there is nothing to cache and this functions returns
212      * {@code false}.
213      *
214      * <p>The cache can still be used to create a {@link PoissonSampler} using
215      * {@link #createPoissonSampler(UniformRandomProvider, double)}.
216      *
217      * <p>This method can be used to determine if the cache has a potential
218      * performance benefit.
219      *
220      * @return true, if the cache covers a range of mean values
221      */
222     public boolean isValidRange() {
223         return values != null;
224     }
225 
226     /**
227      * Gets the minimum mean covered by the cache.
228      *
229      * <p>This value is the inclusive lower bound and is equal to
230      * the lowest integer-valued mean that is covered by the cache.
231      *
232      * <p>Note that this value may not match the value passed to the constructor
233      * due to the following reasons:
234      *
235      * <ul>
236      *   <li>At small mean values a different algorithm is used for Poisson
237      *       sampling and the cache is unnecessary.
238      *   <li>The minimum is always an integer so may be below the constructor
239      *       minimum mean.
240      * </ul>
241      *
242      * <p>If {@link #isValidRange()} returns {@code true} the cache will store
243      * state to reduce construction cost of samplers in
244      * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()}
245      * inclusive. Otherwise this method returns 0;
246      *
247      * @return The minimum mean covered by the cache.
248      */
249     public double getMinMean()
250     {
251         return minN;
252     }
253 
254     /**
255      * Gets the maximum mean covered by the cache.
256      *
257      * <p>This value is the inclusive upper bound and is equal to
258      * the double value below the first integer-valued mean that is
259      * above range covered by the cache.
260      *
261      * <p>Note that this value may not match the value passed to the constructor
262      * due to the following reasons:
263      * <ul>
264      *   <li>At small mean values a different algorithm is used for Poisson
265      *       sampling and the cache is unnecessary.
266      *   <li>The maximum is always the double value below an integer so
267      *       may be above the constructor maximum mean.
268      * </ul>
269      *
270      * <p>If {@link #isValidRange()} returns {@code true} the cache will store
271      * state to reduce construction cost of samplers in
272      * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()}
273      * inclusive. Otherwise this method returns 0;
274      *
275      * @return The maximum mean covered by the cache.
276      */
277     public double getMaxMean()
278     {
279         if (isValidRange()) {
280             return Math.nextAfter(maxN + 1.0, -1);
281         }
282         return 0;
283     }
284 
285     /**
286      * Gets the minimum mean value that can be cached.
287      *
288      * <p>Any {@link PoissonSampler} created with a mean below this level will not
289      * have any state that can be cached.
290      *
291      * @return the minimum cached mean
292      */
293     public static double getMinimumCachedMean() {
294         return PoissonSampler.PIVOT;
295     }
296 
297     /**
298      * Create a new {@link PoissonSamplerCache} with the given range
299      * reusing the current cache values.
300      *
301      * <p>This will create a new object even if the range is smaller or the
302      * same as the current cache.
303      *
304      * @param minMean The minimum mean covered by the cache.
305      * @param maxMean The maximum mean covered by the cache.
306      * @throws IllegalArgumentException if {@code maxMean < minMean}
307      * @return the poisson sampler cache
308      */
309     public PoissonSamplerCache withRange(double minMean,
310                                          double maxMean) {
311         if (values == null) {
312             // Nothing to reuse
313             return new PoissonSamplerCache(minMean, maxMean);
314         }
315         checkMeanRange(minMean, maxMean);
316 
317         // The cache can only be used for the LargeMeanPoissonSampler.
318         if (maxMean < PoissonSampler.PIVOT) {
319             return new PoissonSamplerCache(0, 0);
320         }
321 
322         // Convert the mean into integers.
323         // Note the minimum is clipped to the algorithm switch point.
324         final int withMinN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT));
325         final int withMaxN = (int) Math.floor(maxMean);
326         final LargeMeanPoissonSamplerState[] states =
327                 new LargeMeanPoissonSamplerState[withMaxN - withMinN + 1];
328 
329         // Preserve values from the current array to the next
330         int currentIndex;
331         int nextIndex;
332         if (this.minN <= withMinN) {
333             // The current array starts before the new array
334             currentIndex = withMinN - this.minN;
335             nextIndex = 0;
336         } else {
337             // The new array starts before the current array
338             currentIndex = 0;
339             nextIndex = this.minN - withMinN;
340         }
341         final int length = Math.min(values.length - currentIndex, states.length - nextIndex);
342         if (length > 0) {
343             System.arraycopy(values, currentIndex, states, nextIndex, length);
344         }
345 
346         return new PoissonSamplerCache(withMinN, withMaxN, states);
347     }
348 }