1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.rng.sampling.distribution; 18 19 import org.apache.commons.rng.UniformRandomProvider; 20 import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler.LargeMeanPoissonSamplerState; 21 22 /** 23 * Create a sampler for the 24 * <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson 25 * distribution</a> using a cache to minimise construction cost. 26 * 27 * <p>The cache will return a sampler equivalent to 28 * {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}.</p> 29 * 30 * <p>The cache allows the {@link PoissonSampler} construction cost to be minimised 31 * for low size Poisson samples. The cache stores state for a range of integers where 32 * integer value {@code n} can be used to construct a sampler for the range 33 * {@code n <= mean < n+1}.</p> 34 * 35 * <p>The cache is advantageous under the following conditions:</p> 36 * 37 * <ul> 38 * <li>The mean of the Poisson distribution falls within a known range. 39 * <li>The sample size to be made with the <strong>same</strong> sampler is 40 * small. 41 * <li>The Poisson samples have different means with the same integer 42 * value(s) after rounding down. 43 * </ul> 44 * 45 * <p>If the sample size to be made with the <strong>same</strong> sampler is large 46 * then the construction cost is low compared to the sampling time and the cache 47 * has minimal benefit.</p> 48 * 49 * <p>Performance improvement is dependent on the speed of the 50 * {@link UniformRandomProvider}. A fast provider can obtain a two-fold speed 51 * improvement for a single-use Poisson sampler.</p> 52 * 53 * <p>The cache is thread safe. Note that concurrent threads using the cache 54 * must ensure a thread safe {@link UniformRandomProvider} is used when creating 55 * samplers, e.g. a unique sampler per thread.</p> 56 * 57 * <p>Sampling uses:</p> 58 * 59 * <ul> 60 * <li>{@link UniformRandomProvider#nextDouble()} 61 * <li>{@link UniformRandomProvider#nextLong()} (large means only) 62 * </ul> 63 * 64 * @since 1.2 65 */ 66 public class PoissonSamplerCache { 67 68 /** 69 * The minimum N covered by the cache where 70 * {@code N = (int)Math.floor(mean)}. 71 */ 72 private final int minN; 73 /** 74 * The maximum N covered by the cache where 75 * {@code N = (int)Math.floor(mean)}. 76 */ 77 private final int maxN; 78 /** The cache of states between {@link minN} and {@link maxN}. */ 79 private final LargeMeanPoissonSamplerState[] values; 80 81 /** 82 * @param minMean The minimum mean covered by the cache. 83 * @param maxMean The maximum mean covered by the cache. 84 * @throws IllegalArgumentException if {@code maxMean < minMean} 85 */ 86 public PoissonSamplerCache(double minMean, 87 double maxMean) { 88 89 checkMeanRange(minMean, maxMean); 90 91 // The cache can only be used for the LargeMeanPoissonSampler. 92 if (maxMean < PoissonSampler.PIVOT) { 93 // The upper limit is too small so no cache will be used. 94 // This class will just construct new samplers. 95 minN = 0; 96 maxN = 0; 97 values = null; 98 } else { 99 // Convert the mean into integers. 100 // Note the minimum is clipped to the algorithm switch point. 101 this.minN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT)); 102 this.maxN = (int) Math.floor(Math.min(maxMean, Integer.MAX_VALUE)); 103 values = new LargeMeanPoissonSamplerState[maxN - minN + 1]; 104 } 105 } 106 107 /** 108 * @param minN The minimum N covered by the cache where {@code N = (int)Math.floor(mean)}. 109 * @param maxN The maximum N covered by the cache where {@code N = (int)Math.floor(mean)}. 110 * @param states The precomputed states. 111 */ 112 private PoissonSamplerCache(int minN, 113 int maxN, 114 LargeMeanPoissonSamplerState[] states) { 115 this.minN = minN; 116 this.maxN = maxN; 117 // Stored directly as the states were newly created within this class. 118 this.values = states; 119 } 120 121 /** 122 * Check the mean range. 123 * 124 * @param minMean The minimum mean covered by the cache. 125 * @param maxMean The maximum mean covered by the cache. 126 * @throws IllegalArgumentException if {@code maxMean < minMean} 127 */ 128 private static void checkMeanRange(double minMean, double maxMean) { 129 // Note: 130 // Although a mean of 0 is invalid for a Poisson sampler this case 131 // is handled to make the cache user friendly. Any low means will 132 // be handled by the SmallMeanPoissonSampler and not cached. 133 // For this reason it is also OK if the means are negative. 134 135 // Allow minMean == maxMean so that the cache can be used 136 // to create samplers with distinct RNGs and the same mean. 137 if (maxMean < minMean) { 138 throw new IllegalArgumentException( 139 "Max mean: " + maxMean + " < " + minMean); 140 } 141 } 142 143 /** 144 * Creates a new Poisson sampler. 145 * 146 * <p>The returned sampler will function exactly the 147 * same as {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}. 148 * 149 * @param rng Generator of uniformly distributed random numbers. 150 * @param mean Mean. 151 * @return A Poisson sampler 152 * @throws IllegalArgumentException if {@code mean <= 0} or 153 * {@code mean >} {@link Integer#MAX_VALUE}. 154 */ 155 public DiscreteSampler createPoissonSampler(UniformRandomProvider rng, 156 double mean) { 157 // Ensure the same functionality as the PoissonSampler by 158 // using a SmallMeanPoissonSampler under the switch point. 159 if (mean < PoissonSampler.PIVOT) { 160 return SmallMeanPoissonSampler.of(rng, mean); 161 } 162 if (mean > maxN) { 163 // Outside the range of the cache. 164 // This avoids extra parameter checks and handles the case when 165 // the cache is empty or if Math.floor(mean) is not an integer. 166 return LargeMeanPoissonSampler.of(rng, mean); 167 } 168 169 // Convert the mean into an integer. 170 final int n = (int) Math.floor(mean); 171 if (n < minN) { 172 // Outside the lower range of the cache. 173 return LargeMeanPoissonSampler.of(rng, mean); 174 } 175 176 // Look in the cache for a state that can be reused. 177 // Note: The cache is offset by minN. 178 final int index = n - minN; 179 final LargeMeanPoissonSamplerState state = values[index]; 180 if (state == null) { 181 // Create a sampler and store the state for reuse. 182 // Do not worry about thread contention 183 // as the state is effectively immutable. 184 // If recomputed and replaced it will the same. 185 final LargeMeanPoissonSampler sampler = new LargeMeanPoissonSampler(rng, mean); 186 values[index] = sampler.getState(); 187 return sampler; 188 } 189 // Compute the remaining fraction of the mean 190 final double lambdaFractional = mean - n; 191 return new LargeMeanPoissonSampler(rng, state, lambdaFractional); 192 } 193 194 /** 195 * Check if the mean is within the range where the cache can minimise the 196 * construction cost of the {@link PoissonSampler}. 197 * 198 * @param mean 199 * the mean 200 * @return true, if within the cache range 201 */ 202 public boolean withinRange(double mean) { 203 if (mean < PoissonSampler.PIVOT) { 204 // Construction is optimal 205 return true; 206 } 207 // Convert the mean into an integer. 208 final int n = (int) Math.floor(mean); 209 return n <= maxN && n >= minN; 210 } 211 212 /** 213 * Checks if the cache covers a valid range of mean values. 214 * 215 * <p>Note that the cache is only valid for one of the Poisson sampling 216 * algorithms. In the instance that a range was requested that was too 217 * low then there is nothing to cache and this functions returns 218 * {@code false}. 219 * 220 * <p>The cache can still be used to create a {@link PoissonSampler} using 221 * {@link #createPoissonSampler(UniformRandomProvider, double)}. 222 * 223 * <p>This method can be used to determine if the cache has a potential 224 * performance benefit. 225 * 226 * @return true, if the cache covers a range of mean values 227 */ 228 public boolean isValidRange() { 229 return values != null; 230 } 231 232 /** 233 * Gets the minimum mean covered by the cache. 234 * 235 * <p>This value is the inclusive lower bound and is equal to 236 * the lowest integer-valued mean that is covered by the cache. 237 * 238 * <p>Note that this value may not match the value passed to the constructor 239 * due to the following reasons: 240 * 241 * <ul> 242 * <li>At small mean values a different algorithm is used for Poisson 243 * sampling and the cache is unnecessary. 244 * <li>The minimum is always an integer so may be below the constructor 245 * minimum mean. 246 * </ul> 247 * 248 * <p>If {@link #isValidRange()} returns {@code true} the cache will store 249 * state to reduce construction cost of samplers in 250 * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()} 251 * inclusive. Otherwise this method returns 0; 252 * 253 * @return The minimum mean covered by the cache. 254 */ 255 public double getMinMean() { 256 return minN; 257 } 258 259 /** 260 * Gets the maximum mean covered by the cache. 261 * 262 * <p>This value is the inclusive upper bound and is equal to 263 * the double value below the first integer-valued mean that is 264 * above range covered by the cache. 265 * 266 * <p>Note that this value may not match the value passed to the constructor 267 * due to the following reasons: 268 * <ul> 269 * <li>At small mean values a different algorithm is used for Poisson 270 * sampling and the cache is unnecessary. 271 * <li>The maximum is always the double value below an integer so 272 * may be above the constructor maximum mean. 273 * </ul> 274 * 275 * <p>If {@link #isValidRange()} returns {@code true} the cache will store 276 * state to reduce construction cost of samplers in 277 * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()} 278 * inclusive. Otherwise this method returns 0; 279 * 280 * @return The maximum mean covered by the cache. 281 */ 282 public double getMaxMean() { 283 if (isValidRange()) { 284 return Math.nextAfter(maxN + 1.0, -1); 285 } 286 return 0; 287 } 288 289 /** 290 * Gets the minimum mean value that can be cached. 291 * 292 * <p>Any {@link PoissonSampler} created with a mean below this level will not 293 * have any state that can be cached. 294 * 295 * @return the minimum cached mean 296 */ 297 public static double getMinimumCachedMean() { 298 return PoissonSampler.PIVOT; 299 } 300 301 /** 302 * Create a new {@link PoissonSamplerCache} with the given range 303 * reusing the current cache values. 304 * 305 * <p>This will create a new object even if the range is smaller or the 306 * same as the current cache. 307 * 308 * @param minMean The minimum mean covered by the cache. 309 * @param maxMean The maximum mean covered by the cache. 310 * @throws IllegalArgumentException if {@code maxMean < minMean} 311 * @return the poisson sampler cache 312 */ 313 public PoissonSamplerCache withRange(double minMean, 314 double maxMean) { 315 if (values == null) { 316 // Nothing to reuse 317 return new PoissonSamplerCache(minMean, maxMean); 318 } 319 checkMeanRange(minMean, maxMean); 320 321 // The cache can only be used for the LargeMeanPoissonSampler. 322 if (maxMean < PoissonSampler.PIVOT) { 323 return new PoissonSamplerCache(0, 0); 324 } 325 326 // Convert the mean into integers. 327 // Note the minimum is clipped to the algorithm switch point. 328 final int withMinN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT)); 329 final int withMaxN = (int) Math.floor(maxMean); 330 final LargeMeanPoissonSamplerState[] states = 331 new LargeMeanPoissonSamplerState[withMaxN - withMinN + 1]; 332 333 // Preserve values from the current array to the next 334 int currentIndex; 335 int nextIndex; 336 if (this.minN <= withMinN) { 337 // The current array starts before the new array 338 currentIndex = withMinN - this.minN; 339 nextIndex = 0; 340 } else { 341 // The new array starts before the current array 342 currentIndex = 0; 343 nextIndex = this.minN - withMinN; 344 } 345 final int length = Math.min(values.length - currentIndex, states.length - nextIndex); 346 if (length > 0) { 347 System.arraycopy(values, currentIndex, states, nextIndex, length); 348 } 349 350 return new PoissonSamplerCache(withMinN, withMaxN, states); 351 } 352 }