1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math.stat.descriptive; 18 19 import java.io.Serializable; 20 import java.lang.reflect.InvocationTargetException; 21 import java.util.Arrays; 22 23 import org.apache.commons.discovery.tools.DiscoverClass; 24 import org.apache.commons.math.stat.descriptive.moment.GeometricMean; 25 import org.apache.commons.math.stat.descriptive.moment.Kurtosis; 26 import org.apache.commons.math.stat.descriptive.moment.Mean; 27 import org.apache.commons.math.stat.descriptive.moment.Skewness; 28 import org.apache.commons.math.stat.descriptive.moment.Variance; 29 import org.apache.commons.math.stat.descriptive.rank.Max; 30 import org.apache.commons.math.stat.descriptive.rank.Min; 31 import org.apache.commons.math.stat.descriptive.rank.Percentile; 32 import org.apache.commons.math.stat.descriptive.summary.Sum; 33 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; 34 import org.apache.commons.math.util.ResizableDoubleArray; 35 36 37 /** 38 * Maintains a dataset of values of a single variable and computes descriptive 39 * statistics based on stored data. The {@link #getWindowSize() windowSize} 40 * property sets a limit on the number of values that can be stored in the 41 * dataset. The default value, INFINITE_WINDOW, puts no limit on the size of 42 * the dataset. This value should be used with caution, as the backing store 43 * will grow without bound in this case. For very large datasets, 44 * {@link SummaryStatistics}, which does not store the dataset, should be used 45 * instead of this class. If <code>windowSize</code> is not INFINITE_WINDOW and 46 * more values are added than can be stored in the dataset, new values are 47 * added in a "rolling" manner, with new values replacing the "oldest" values 48 * in the dataset. 49 * 50 * <p>Note: this class is not threadsafe. Use 51 * {@link SynchronizedDescriptiveStatistics} if concurrent access from multiple 52 * threads is required.</p> 53 * 54 * @version $Revision: 620318 $ $Date: 2008-02-10 13:17:24 -0700 (Sun, 10 Feb 2008) $ 55 */ 56 public class DescriptiveStatistics implements StatisticalSummary, Serializable { 57 58 /** Serialization UID */ 59 private static final long serialVersionUID = -2734185686570407433L; 60 61 /** hold the window size **/ 62 protected int windowSize = INFINITE_WINDOW; 63 64 /** 65 * Stored data values 66 */ 67 protected ResizableDoubleArray eDA = new ResizableDoubleArray(); 68 69 /** Mean statistic implementation - can be reset by setter. */ 70 private UnivariateStatistic meanImpl = new Mean(); 71 72 /** Geometric mean statistic implementation - can be reset by setter. */ 73 private UnivariateStatistic geometricMeanImpl = new GeometricMean(); 74 75 /** Kurtosis statistic implementation - can be reset by setter. */ 76 private UnivariateStatistic kurtosisImpl = new Kurtosis(); 77 78 /** Maximum statistic implementation - can be reset by setter. */ 79 private UnivariateStatistic maxImpl = new Max(); 80 81 /** Minimum statistic implementation - can be reset by setter. */ 82 private UnivariateStatistic minImpl = new Min(); 83 84 /** Percentile statistic implementation - can be reset by setter. */ 85 private UnivariateStatistic percentileImpl = new Percentile(); 86 87 /** Skewness statistic implementation - can be reset by setter. */ 88 private UnivariateStatistic skewnessImpl = new Skewness(); 89 90 /** Variance statistic implementation - can be reset by setter. */ 91 private UnivariateStatistic varianceImpl = new Variance(); 92 93 /** Sum of squares statistic implementation - can be reset by setter. */ 94 private UnivariateStatistic sumsqImpl = new SumOfSquares(); 95 96 /** Sum statistic implementation - can be reset by setter. */ 97 private UnivariateStatistic sumImpl = new Sum(); 98 99 /** 100 * Construct a DescriptiveStatistics instance with an infinite window 101 */ 102 public DescriptiveStatistics() { 103 } 104 105 /** 106 * Construct a DescriptiveStatistics instance with the specified window 107 * 108 * @param window the window size. 109 */ 110 public DescriptiveStatistics(int window) { 111 super(); 112 setWindowSize(window); 113 } 114 115 /** 116 * Create an instance of a <code>DescriptiveStatistics</code> 117 * @param cls the type of <code>DescriptiveStatistics</code> object to 118 * create. 119 * @return a new instance. 120 * @throws InstantiationException is thrown if the object can not be 121 * created. 122 * @throws IllegalAccessException is thrown if the type's default 123 * constructor is not accessible. 124 * @deprecated to be removed in commons-math 2.0 125 */ 126 public static DescriptiveStatistics newInstance(Class cls) throws InstantiationException, IllegalAccessException { 127 return (DescriptiveStatistics)cls.newInstance(); 128 } 129 130 /** 131 * Create an instance of a <code>DescriptiveStatistics</code> 132 * @return a new DescriptiveStatistics instance. 133 * @deprecated to be removed in commons-math 2.0 134 */ 135 public static DescriptiveStatistics newInstance() { 136 DescriptiveStatistics factory = null; 137 try { 138 DiscoverClass dc = new DiscoverClass(); 139 factory = (DescriptiveStatistics) dc.newInstance( 140 DescriptiveStatistics.class, 141 "org.apache.commons.math.stat.descriptive.DescriptiveStatisticsImpl"); 142 } catch(Throwable t) { 143 return new DescriptiveStatisticsImpl(); 144 } 145 return factory; 146 } 147 148 /** 149 * Represents an infinite window size. When the {@link #getWindowSize()} 150 * returns this value, there is no limit to the number of data values 151 * that can be stored in the dataset. 152 */ 153 public static final int INFINITE_WINDOW = -1; 154 155 /** 156 * Adds the value to the dataset. If the dataset is at the maximum size 157 * (i.e., the number of stored elements equals the currently configured 158 * windowSize), the first (oldest) element in the dataset is discarded 159 * to make room for the new value. 160 * 161 * @param v the value to be added 162 */ 163 public void addValue(double v) { 164 if (windowSize != INFINITE_WINDOW) { 165 if (getN() == windowSize) { 166 eDA.addElementRolling(v); 167 } else if (getN() < windowSize) { 168 eDA.addElement(v); 169 } 170 } else { 171 eDA.addElement(v); 172 } 173 } 174 175 /** 176 * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm"> 177 * arithmetic mean </a> of the available values 178 * @return The mean or Double.NaN if no values have been added. 179 */ 180 public double getMean() { 181 return apply(meanImpl); 182 } 183 184 /** 185 * Returns the <a href="http://www.xycoon.com/geometric_mean.htm"> 186 * geometric mean </a> of the available values 187 * @return The geometricMean, Double.NaN if no values have been added, 188 * or if the productof the available values is less than or equal to 0. 189 */ 190 public double getGeometricMean() { 191 return apply(geometricMeanImpl); 192 } 193 194 /** 195 * Returns the variance of the available values. 196 * @return The variance, Double.NaN if no values have been added 197 * or 0.0 for a single value set. 198 */ 199 public double getVariance() { 200 return apply(varianceImpl); 201 } 202 203 /** 204 * Returns the standard deviation of the available values. 205 * @return The standard deviation, Double.NaN if no values have been added 206 * or 0.0 for a single value set. 207 */ 208 public double getStandardDeviation() { 209 double stdDev = Double.NaN; 210 if (getN() > 0) { 211 if (getN() > 1) { 212 stdDev = Math.sqrt(getVariance()); 213 } else { 214 stdDev = 0.0; 215 } 216 } 217 return (stdDev); 218 } 219 220 /** 221 * Returns the skewness of the available values. Skewness is a 222 * measure of the asymmetry of a given distribution. 223 * @return The skewness, Double.NaN if no values have been added 224 * or 0.0 for a value set <=2. 225 */ 226 public double getSkewness() { 227 return apply(skewnessImpl); 228 } 229 230 /** 231 * Returns the Kurtosis of the available values. Kurtosis is a 232 * measure of the "peakedness" of a distribution 233 * @return The kurtosis, Double.NaN if no values have been added, or 0.0 234 * for a value set <=3. 235 */ 236 public double getKurtosis() { 237 return apply(kurtosisImpl); 238 } 239 240 /** 241 * Returns the maximum of the available values 242 * @return The max or Double.NaN if no values have been added. 243 */ 244 public double getMax() { 245 return apply(maxImpl); 246 } 247 248 /** 249 * Returns the minimum of the available values 250 * @return The min or Double.NaN if no values have been added. 251 */ 252 public double getMin() { 253 return apply(minImpl); 254 } 255 256 /** 257 * Returns the number of available values 258 * @return The number of available values 259 */ 260 public long getN() { 261 return eDA.getNumElements(); 262 } 263 264 /** 265 * Returns the sum of the values that have been added to Univariate. 266 * @return The sum or Double.NaN if no values have been added 267 */ 268 public double getSum() { 269 return apply(sumImpl); 270 } 271 272 /** 273 * Returns the sum of the squares of the available values. 274 * @return The sum of the squares or Double.NaN if no 275 * values have been added. 276 */ 277 public double getSumsq() { 278 return apply(sumsqImpl); 279 } 280 281 /** 282 * Resets all statistics and storage 283 */ 284 public void clear() { 285 eDA.clear(); 286 } 287 288 289 /** 290 * Returns the maximum number of values that can be stored in the 291 * dataset, or INFINITE_WINDOW (-1) if there is no limit. 292 * 293 * @return The current window size or -1 if its Infinite. 294 */ 295 public int getWindowSize() { 296 return windowSize; 297 } 298 299 /** 300 * WindowSize controls the number of values which contribute 301 * to the reported statistics. For example, if 302 * windowSize is set to 3 and the values {1,2,3,4,5} 303 * have been added <strong> in that order</strong> 304 * then the <i>available values</i> are {3,4,5} and all 305 * reported statistics will be based on these values 306 * @param windowSize sets the size of the window. 307 */ 308 public void setWindowSize(int windowSize) { 309 if (windowSize < 1) { 310 if (windowSize != INFINITE_WINDOW) { 311 throw new IllegalArgumentException("window size must be positive."); 312 } 313 } 314 315 this.windowSize = windowSize; 316 317 // We need to check to see if we need to discard elements 318 // from the front of the array. If the windowSize is less than 319 // the current number of elements. 320 if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) { 321 eDA.discardFrontElements(eDA.getNumElements() - windowSize); 322 } 323 } 324 325 /** 326 * Returns the current set of values in an array of double primitives. 327 * The order of addition is preserved. The returned array is a fresh 328 * copy of the underlying data -- i.e., it is not a reference to the 329 * stored data. 330 * 331 * @return returns the current set of numbers in the order in which they 332 * were added to this set 333 */ 334 public double[] getValues() { 335 double[] copiedArray = new double[eDA.getNumElements()]; 336 System.arraycopy(eDA.getElements(), 0, copiedArray, 337 0, eDA.getNumElements()); 338 return copiedArray; 339 } 340 341 /** 342 * Returns the current set of values in an array of double primitives, 343 * sorted in ascending order. The returned array is a fresh 344 * copy of the underlying data -- i.e., it is not a reference to the 345 * stored data. 346 * @return returns the current set of 347 * numbers sorted in ascending order 348 */ 349 public double[] getSortedValues() { 350 double[] sort = getValues(); 351 Arrays.sort(sort); 352 return sort; 353 } 354 355 /** 356 * Returns the element at the specified index 357 * @param index The Index of the element 358 * @return return the element at the specified index 359 */ 360 public double getElement(int index) { 361 return eDA.getElement(index); 362 } 363 364 /** 365 * Returns an estimate for the pth percentile of the stored values. 366 * <p> 367 * The implementation provided here follows the first estimation procedure presented 368 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a> 369 * </p><p> 370 * <strong>Preconditions</strong>:<ul> 371 * <li><code>0 < p < 100</code> (otherwise an 372 * <code>IllegalArgumentException</code> is thrown)</li> 373 * <li>at least one value must be stored (returns <code>Double.NaN 374 * </code> otherwise)</li> 375 * </ul></p> 376 * 377 * @param p the requested percentile (scaled from 0 - 100) 378 * @return An estimate for the pth percentile of the stored data 379 * @throws IllegalStateException if percentile implementation has been 380 * overridden and the supplied implementation does not support setQuantile 381 * values 382 */ 383 public double getPercentile(double p) { 384 if (percentileImpl instanceof Percentile) { 385 ((Percentile) percentileImpl).setQuantile(p); 386 } else { 387 try { 388 percentileImpl.getClass().getMethod("setQuantile", 389 new Class[] {Double.TYPE}).invoke(percentileImpl, 390 new Object[] {new Double(p)}); 391 } catch (NoSuchMethodException e1) { // Setter guard should prevent 392 throw new IllegalArgumentException( 393 "Percentile implementation does not support setQuantile"); 394 } catch (IllegalAccessException e2) { 395 throw new IllegalArgumentException( 396 "IllegalAccessException setting quantile"); 397 } catch (InvocationTargetException e3) { 398 throw new IllegalArgumentException( 399 "Error setting quantile" + e3.toString()); 400 } 401 } 402 return apply(percentileImpl); 403 } 404 405 /** 406 * Generates a text report displaying univariate statistics from values 407 * that have been added. Each statistic is displayed on a separate 408 * line. 409 * 410 * @return String with line feeds displaying statistics 411 */ 412 public String toString() { 413 StringBuffer outBuffer = new StringBuffer(); 414 outBuffer.append("DescriptiveStatistics:\n"); 415 outBuffer.append("n: " + getN() + "\n"); 416 outBuffer.append("min: " + getMin() + "\n"); 417 outBuffer.append("max: " + getMax() + "\n"); 418 outBuffer.append("mean: " + getMean() + "\n"); 419 outBuffer.append("std dev: " + getStandardDeviation() + "\n"); 420 outBuffer.append("median: " + getPercentile(50) + "\n"); 421 outBuffer.append("skewness: " + getSkewness() + "\n"); 422 outBuffer.append("kurtosis: " + getKurtosis() + "\n"); 423 return outBuffer.toString(); 424 } 425 426 /** 427 * Apply the given statistic to the data associated with this set of statistics. 428 * @param stat the statistic to apply 429 * @return the computed value of the statistic. 430 */ 431 public double apply(UnivariateStatistic stat) { 432 return stat.evaluate(eDA.getValues(), eDA.start(), eDA.getNumElements()); 433 } 434 435 // Implementation getters and setter 436 437 /** 438 * Returns the currently configured mean implementation. 439 * 440 * @return the UnivariateStatistic implementing the mean 441 * @since 1.2 442 */ 443 public synchronized UnivariateStatistic getMeanImpl() { 444 return meanImpl; 445 } 446 447 /** 448 * <p>Sets the implementation for the mean.</p> 449 * 450 * @param meanImpl the UnivariateStatistic instance to use 451 * for computing the mean 452 * @since 1.2 453 */ 454 public synchronized void setMeanImpl(UnivariateStatistic meanImpl) { 455 this.meanImpl = meanImpl; 456 } 457 458 /** 459 * Returns the currently configured geometric mean implementation. 460 * 461 * @return the UnivariateStatistic implementing the geometric mean 462 * @since 1.2 463 */ 464 public synchronized UnivariateStatistic getGeometricMeanImpl() { 465 return geometricMeanImpl; 466 } 467 468 /** 469 * <p>Sets the implementation for the gemoetric mean.</p> 470 * 471 * @param geometricMeanImpl the UnivariateStatistic instance to use 472 * for computing the geometric mean 473 * @since 1.2 474 */ 475 public synchronized void setGeometricMeanImpl( 476 UnivariateStatistic geometricMeanImpl) { 477 this.geometricMeanImpl = geometricMeanImpl; 478 } 479 480 /** 481 * Returns the currently configured kurtosis implementation. 482 * 483 * @return the UnivariateStatistic implementing the kurtosis 484 * @since 1.2 485 */ 486 public synchronized UnivariateStatistic getKurtosisImpl() { 487 return kurtosisImpl; 488 } 489 490 /** 491 * <p>Sets the implementation for the kurtosis.</p> 492 * 493 * @param kurtosisImpl the UnivariateStatistic instance to use 494 * for computing the kurtosis 495 * @since 1.2 496 */ 497 public synchronized void setKurtosisImpl(UnivariateStatistic kurtosisImpl) { 498 this.kurtosisImpl = kurtosisImpl; 499 } 500 501 /** 502 * Returns the currently configured maximum implementation. 503 * 504 * @return the UnivariateStatistic implementing the maximum 505 * @since 1.2 506 */ 507 public synchronized UnivariateStatistic getMaxImpl() { 508 return maxImpl; 509 } 510 511 /** 512 * <p>Sets the implementation for the maximum.</p> 513 * 514 * @param maxImpl the UnivariateStatistic instance to use 515 * for computing the maximum 516 * @since 1.2 517 */ 518 public synchronized void setMaxImpl(UnivariateStatistic maxImpl) { 519 this.maxImpl = maxImpl; 520 } 521 522 /** 523 * Returns the currently configured minimum implementation. 524 * 525 * @return the UnivariateStatistic implementing the minimum 526 * @since 1.2 527 */ 528 public synchronized UnivariateStatistic getMinImpl() { 529 return minImpl; 530 } 531 532 /** 533 * <p>Sets the implementation for the minimum.</p> 534 * 535 * @param minImpl the UnivariateStatistic instance to use 536 * for computing the minimum 537 * @since 1.2 538 */ 539 public synchronized void setMinImpl(UnivariateStatistic minImpl) { 540 this.minImpl = minImpl; 541 } 542 543 /** 544 * Returns the currently configured percentile implementation. 545 * 546 * @return the UnivariateStatistic implementing the percentile 547 * @since 1.2 548 */ 549 public synchronized UnivariateStatistic getPercentileImpl() { 550 return percentileImpl; 551 } 552 553 /** 554 * Sets the implementation to be used by {@link #getPercentile(double)}. 555 * The supplied <code>UnivariateStatistic</code> must provide a 556 * <code>setQuantile(double)</code> method; otherwise 557 * <code>IllegalArgumentException</code> is thrown. 558 * 559 * @param percentileImpl the percentileImpl to set 560 * @throws IllegalArgumentException if the supplied implementation does not 561 * provide a <code>setQuantile</code> method 562 * @since 1.2 563 */ 564 public synchronized void setPercentileImpl( 565 UnivariateStatistic percentileImpl) { 566 try { 567 percentileImpl.getClass().getMethod("setQuantile", 568 new Class[] {Double.TYPE}).invoke(percentileImpl, 569 new Object[] {new Double(50.0d)}); 570 } catch (NoSuchMethodException e1) { 571 throw new IllegalArgumentException( 572 "Percentile implementation does not support setQuantile"); 573 } catch (IllegalAccessException e2) { 574 throw new IllegalArgumentException( 575 "IllegalAccessException setting quantile"); 576 } catch (InvocationTargetException e3) { 577 throw new IllegalArgumentException( 578 "Error setting quantile" + e3.toString()); 579 } 580 this.percentileImpl = percentileImpl; 581 } 582 583 /** 584 * Returns the currently configured skewness implementation. 585 * 586 * @return the UnivariateStatistic implementing the skewness 587 * @since 1.2 588 */ 589 public synchronized UnivariateStatistic getSkewnessImpl() { 590 return skewnessImpl; 591 } 592 593 /** 594 * <p>Sets the implementation for the skewness.</p> 595 * 596 * @param skewnessImpl the UnivariateStatistic instance to use 597 * for computing the skewness 598 * @since 1.2 599 */ 600 public synchronized void setSkewnessImpl( 601 UnivariateStatistic skewnessImpl) { 602 this.skewnessImpl = skewnessImpl; 603 } 604 605 /** 606 * Returns the currently configured variance implementation. 607 * 608 * @return the UnivariateStatistic implementing the variance 609 * @since 1.2 610 */ 611 public synchronized UnivariateStatistic getVarianceImpl() { 612 return varianceImpl; 613 } 614 615 /** 616 * <p>Sets the implementation for the variance.</p> 617 * 618 * @param varianceImpl the UnivariateStatistic instance to use 619 * for computing the variance 620 * @since 1.2 621 */ 622 public synchronized void setVarianceImpl( 623 UnivariateStatistic varianceImpl) { 624 this.varianceImpl = varianceImpl; 625 } 626 627 /** 628 * Returns the currently configured sum of squares implementation. 629 * 630 * @return the UnivariateStatistic implementing the sum of squares 631 * @since 1.2 632 */ 633 public synchronized UnivariateStatistic getSumsqImpl() { 634 return sumsqImpl; 635 } 636 637 /** 638 * <p>Sets the implementation for the sum of squares.</p> 639 * 640 * @param sumsqImpl the UnivariateStatistic instance to use 641 * for computing the sum of squares 642 * @since 1.2 643 */ 644 public synchronized void setSumsqImpl(UnivariateStatistic sumsqImpl) { 645 this.sumsqImpl = sumsqImpl; 646 } 647 648 /** 649 * Returns the currently configured sum implementation. 650 * 651 * @return the UnivariateStatistic implementing the sum 652 * @since 1.2 653 */ 654 public synchronized UnivariateStatistic getSumImpl() { 655 return sumImpl; 656 } 657 658 /** 659 * <p>Sets the implementation for the sum.</p> 660 * 661 * @param sumImpl the UnivariateStatistic instance to use 662 * for computing the sum 663 * @since 1.2 664 */ 665 public synchronized void setSumImpl(UnivariateStatistic sumImpl) { 666 this.sumImpl = sumImpl; 667 } 668 }