1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math.stat.descriptive; 18 19 import java.io.Serializable; 20 21 import org.apache.commons.discovery.tools.DiscoverClass; 22 import org.apache.commons.math.stat.descriptive.moment.GeometricMean; 23 import org.apache.commons.math.stat.descriptive.moment.Mean; 24 import org.apache.commons.math.stat.descriptive.moment.SecondMoment; 25 import org.apache.commons.math.stat.descriptive.moment.Variance; 26 import org.apache.commons.math.stat.descriptive.rank.Max; 27 import org.apache.commons.math.stat.descriptive.rank.Min; 28 import org.apache.commons.math.stat.descriptive.summary.Sum; 29 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs; 30 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; 31 import org.apache.commons.math.util.MathUtils; 32 33 /** 34 * <p>Computes summary statistics for a stream of data values added using the 35 * {@link #addValue(double) addValue} method. The data values are not stored in 36 * memory, so this class can be used to compute statistics for very large 37 * data streams.</p> 38 * 39 * <p>The {@link StorelessUnivariateStatistic} instances used to maintain 40 * summary state and compute statistics are configurable via setters. 41 * For example, the default implementation for the variance can be overridden by 42 * calling {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual 43 * parameters to these methods must implement the 44 * {@link StorelessUnivariateStatistic} interface and configuration must be 45 * completed before <code>addValue</code> is called. No configuration is 46 * necessary to use the default, commons-math provided implementations.</p> 47 * 48 * <p>Note: This class is not thread-safe. Use 49 * {@link SynchronizedSummaryStatistics} if concurrent access from multiple 50 * threads is required.</p> 51 * 52 * @version $Revision: 620312 $ $Date: 2008-02-10 12:28:59 -0700 (Sun, 10 Feb 2008) $ 53 */ 54 public class SummaryStatistics implements StatisticalSummary, Serializable { 55 56 /** Serialization UID */ 57 private static final long serialVersionUID = -3346512372447011854L; 58 59 /** 60 * Create an instance of a <code>SummaryStatistics</code> 61 * 62 * @param cls the type of <code>SummaryStatistics</code> object to 63 * create. 64 * @return a new instance. 65 * @deprecated to be removed in commons-math 2.0 66 * @throws InstantiationException is thrown if the object can not be 67 * created. 68 * @throws IllegalAccessException is thrown if the type's default 69 * constructor is not accessible. 70 */ 71 public static SummaryStatistics newInstance(Class cls) throws 72 InstantiationException, IllegalAccessException { 73 return (SummaryStatistics)cls.newInstance(); 74 } 75 76 /** 77 * Create an instance of a <code>SummaryStatistics</code> 78 * 79 * @return a new SummaryStatistics instance. 80 * @deprecated to be removed in commons-math 2.0 81 */ 82 public static SummaryStatistics newInstance() { 83 SummaryStatistics instance = null; 84 try { 85 DiscoverClass dc = new DiscoverClass(); 86 instance = (SummaryStatistics) dc.newInstance( 87 SummaryStatistics.class, 88 "org.apache.commons.math.stat.descriptive.SummaryStatisticsImpl"); 89 } catch(Throwable t) { 90 return new SummaryStatisticsImpl(); 91 } 92 return instance; 93 } 94 95 /** 96 * Construct a SummaryStatistics instance 97 */ 98 public SummaryStatistics() { 99 } 100 101 /** count of values that have been added */ 102 protected long n = 0; 103 104 /** SecondMoment is used to compute the mean and variance */ 105 protected SecondMoment secondMoment = new SecondMoment(); 106 107 /** sum of values that have been added */ 108 protected Sum sum = new Sum(); 109 110 /** sum of the square of each value that has been added */ 111 protected SumOfSquares sumsq = new SumOfSquares(); 112 113 /** min of values that have been added */ 114 protected Min min = new Min(); 115 116 /** max of values that have been added */ 117 protected Max max = new Max(); 118 119 /** sumLog of values that have been added */ 120 protected SumOfLogs sumLog = new SumOfLogs(); 121 122 /** geoMean of values that have been added */ 123 protected GeometricMean geoMean = new GeometricMean(sumLog); 124 125 /** mean of values that have been added */ 126 protected Mean mean = new Mean(); 127 128 /** variance of values that have been added */ 129 protected Variance variance = new Variance(); 130 131 /** Sum statistic implementation - can be reset by setter. */ 132 private StorelessUnivariateStatistic sumImpl = sum; 133 134 /** Sum of squares statistic implementation - can be reset by setter. */ 135 private StorelessUnivariateStatistic sumsqImpl = sumsq; 136 137 /** Minimum statistic implementation - can be reset by setter. */ 138 private StorelessUnivariateStatistic minImpl = min; 139 140 /** Maximum statistic implementation - can be reset by setter. */ 141 private StorelessUnivariateStatistic maxImpl = max; 142 143 /** Sum of log statistic implementation - can be reset by setter. */ 144 private StorelessUnivariateStatistic sumLogImpl = sumLog; 145 146 /** Geometric mean statistic implementation - can be reset by setter. */ 147 private StorelessUnivariateStatistic geoMeanImpl = geoMean; 148 149 /** Mean statistic implementation - can be reset by setter. */ 150 private StorelessUnivariateStatistic meanImpl = mean; 151 152 /** Variance statistic implementation - can be reset by setter. */ 153 private StorelessUnivariateStatistic varianceImpl = variance; 154 155 /** 156 * Return a {@link StatisticalSummaryValues} instance reporting current 157 * statistics. 158 * 159 * @return Current values of statistics 160 */ 161 public StatisticalSummary getSummary() { 162 return new StatisticalSummaryValues(getMean(), getVariance(), getN(), 163 getMax(), getMin(), getSum()); 164 } 165 166 /** 167 * Add a value to the data 168 * 169 * @param value the value to add 170 */ 171 public void addValue(double value) { 172 sumImpl.increment(value); 173 sumsqImpl.increment(value); 174 minImpl.increment(value); 175 maxImpl.increment(value); 176 sumLogImpl.increment(value); 177 secondMoment.increment(value); 178 // If mean, variance or geomean have been overridden, 179 // need to increment these 180 if (!(meanImpl instanceof Mean)) { 181 meanImpl.increment(value); 182 } 183 if (!(varianceImpl instanceof Variance)) { 184 varianceImpl.increment(value); 185 } 186 if (!(geoMeanImpl instanceof GeometricMean)) { 187 geoMeanImpl.increment(value); 188 } 189 n++; 190 } 191 192 /** 193 * Returns the number of available values 194 * @return The number of available values 195 */ 196 public long getN() { 197 return n; 198 } 199 200 /** 201 * Returns the sum of the values that have been added 202 * @return The sum or <code>Double.NaN</code> if no values have been added 203 */ 204 public double getSum() { 205 return sumImpl.getResult(); 206 } 207 208 /** 209 * Returns the sum of the squares of the values that have been added. 210 * <p> 211 * Double.NaN is returned if no values have been added.</p> 212 * 213 * @return The sum of squares 214 */ 215 public double getSumsq() { 216 return sumsqImpl.getResult(); 217 } 218 219 /** 220 * Returns the mean of the values that have been added. 221 * <p> 222 * Double.NaN is returned if no values have been added.</p> 223 * 224 * @return the mean 225 */ 226 public double getMean() { 227 if (mean == meanImpl) { 228 return new Mean(secondMoment).getResult(); 229 } else { 230 return meanImpl.getResult(); 231 } 232 } 233 234 /** 235 * Returns the standard deviation of the values that have been added. 236 * <p> 237 * Double.NaN is returned if no values have been added.</p> 238 * 239 * @return the standard deviation 240 */ 241 public double getStandardDeviation() { 242 double stdDev = Double.NaN; 243 if (getN() > 0) { 244 if (getN() > 1) { 245 stdDev = Math.sqrt(getVariance()); 246 } else { 247 stdDev = 0.0; 248 } 249 } 250 return (stdDev); 251 } 252 253 /** 254 * Returns the variance of the values that have been added. 255 * <p> 256 * Double.NaN is returned if no values have been added.</p> 257 * 258 * @return the variance 259 */ 260 public double getVariance() { 261 if (varianceImpl == variance) { 262 return new Variance(secondMoment).getResult(); 263 } else { 264 return varianceImpl.getResult(); 265 } 266 } 267 268 /** 269 * Returns the maximum of the values that have been added. 270 * <p> 271 * Double.NaN is returned if no values have been added.</p> 272 * 273 * @return the maximum 274 */ 275 public double getMax() { 276 return maxImpl.getResult(); 277 } 278 279 /** 280 * Returns the minimum of the values that have been added. 281 * <p> 282 * Double.NaN is returned if no values have been added.</p> 283 * 284 * @return the minimum 285 */ 286 public double getMin() { 287 return minImpl.getResult(); 288 } 289 290 /** 291 * Returns the geometric mean of the values that have been added. 292 * <p> 293 * Double.NaN is returned if no values have been added.</p> 294 * 295 * @return the geometric mean 296 */ 297 public double getGeometricMean() { 298 return geoMeanImpl.getResult(); 299 } 300 301 /** 302 * Returns the sum of the logs of the values that have been added. 303 * <p> 304 * Double.NaN is returned if no values have been added.</p> 305 * 306 * @return the sum of logs 307 * @since 1.2 308 */ 309 public double getSumOfLogs() { 310 return sumLogImpl.getResult(); 311 } 312 313 /** 314 * Generates a text report displaying 315 * summary statistics from values that 316 * have been added. 317 * @return String with line feeds displaying statistics 318 * @since 1.2 319 */ 320 public String toString() { 321 StringBuffer outBuffer = new StringBuffer(); 322 outBuffer.append("SummaryStatistics:\n"); 323 outBuffer.append("n: " + getN() + "\n"); 324 outBuffer.append("min: " + getMin() + "\n"); 325 outBuffer.append("max: " + getMax() + "\n"); 326 outBuffer.append("mean: " + getMean() + "\n"); 327 outBuffer.append("geometric mean: " + getGeometricMean() + "\n"); 328 outBuffer.append("variance: " + getVariance() + "\n"); 329 outBuffer.append("sum of squares: " + getSumsq() + "\n"); 330 outBuffer.append("standard deviation: " + getStandardDeviation() + "\n"); 331 outBuffer.append("sum of logs: " + getSumOfLogs() + "\n"); 332 return outBuffer.toString(); 333 } 334 335 /** 336 * Resets all statistics and storage 337 */ 338 public void clear() { 339 this.n = 0; 340 minImpl.clear(); 341 maxImpl.clear(); 342 sumImpl.clear(); 343 sumLogImpl.clear(); 344 sumsqImpl.clear(); 345 geoMeanImpl.clear(); 346 secondMoment.clear(); 347 if (meanImpl != mean) { 348 meanImpl.clear(); 349 } 350 if (varianceImpl != variance) { 351 varianceImpl.clear(); 352 } 353 } 354 355 /** 356 * Returns true iff <code>object</code> is a <code>SummaryStatistics</code> 357 * instance and all statistics have the same values as this. 358 * @param object the object to test equality against. 359 * @return true if object equals this 360 */ 361 public boolean equals(Object object) { 362 if (object == this ) { 363 return true; 364 } 365 if (object instanceof SummaryStatistics == false) { 366 return false; 367 } 368 SummaryStatistics stat = (SummaryStatistics) object; 369 return (MathUtils.equals(stat.getGeometricMean(), 370 this.getGeometricMean()) && 371 MathUtils.equals(stat.getMax(), this.getMax()) && 372 MathUtils.equals(stat.getMean(),this.getMean()) && 373 MathUtils.equals(stat.getMin(),this.getMin()) && 374 MathUtils.equals(stat.getN(), this.getN()) && 375 MathUtils.equals(stat.getSum(), this.getSum()) && 376 MathUtils.equals(stat.getSumsq(),this.getSumsq()) && 377 MathUtils.equals(stat.getVariance(),this.getVariance())); 378 } 379 380 /** 381 * Returns hash code based on values of statistics 382 * 383 * @return hash code 384 */ 385 public int hashCode() { 386 int result = 31 + MathUtils.hash(getGeometricMean()); 387 result = result * 31 + MathUtils.hash(getGeometricMean()); 388 result = result * 31 + MathUtils.hash(getMax()); 389 result = result * 31 + MathUtils.hash(getMean()); 390 result = result * 31 + MathUtils.hash(getMin()); 391 result = result * 31 + MathUtils.hash(getN()); 392 result = result * 31 + MathUtils.hash(getSum()); 393 result = result * 31 + MathUtils.hash(getSumsq()); 394 result = result * 31 + MathUtils.hash(getVariance()); 395 return result; 396 } 397 398 // Getters and setters for statistics implementations 399 /** 400 * Returns the currently configured Sum implementation 401 * 402 * @return the StorelessUnivariateStatistic implementing the sum 403 * @since 1.2 404 */ 405 public StorelessUnivariateStatistic getSumImpl() { 406 return sumImpl; 407 } 408 409 /** 410 * <p>Sets the implementation for the Sum.</p> 411 * <p>This method must be activated before any data has been added - i.e., 412 * before {@link #addValue(double) addValue} has been used to add data; 413 * otherwise an IllegalStateException will be thrown.</p> 414 * 415 * @param sumImpl the StorelessUnivariateStatistic instance to use 416 * for computing the Sum 417 * @throws IllegalStateException if data has already been added 418 * (i.e if n > 0) 419 * @since 1.2 420 */ 421 public void setSumImpl(StorelessUnivariateStatistic sumImpl) { 422 checkEmpty(); 423 this.sumImpl = sumImpl; 424 } 425 426 /** 427 * Returns the currently configured sum of squares implementation 428 * 429 * @return the StorelessUnivariateStatistic implementing the sum of squares 430 * @since 1.2 431 */ 432 public StorelessUnivariateStatistic getSumsqImpl() { 433 return sumsqImpl; 434 } 435 436 /** 437 * <p>Sets the implementation for the sum of squares.</p> 438 * <p>This method must be activated before any data has been added - i.e., 439 * before {@link #addValue(double) addValue} has been used to add data; 440 * otherwise an IllegalStateException will be thrown.</p> 441 * 442 * @param sumsqImpl the StorelessUnivariateStatistic instance to use 443 * for computing the sum of squares 444 * @throws IllegalStateException if data has already been added 445 * (i.e if n > 0) 446 * @since 1.2 447 */ 448 public void setSumsqImpl( 449 StorelessUnivariateStatistic sumsqImpl) { 450 checkEmpty(); 451 this.sumsqImpl = sumsqImpl; 452 } 453 454 /** 455 * Returns the currently configured minimum implementation 456 * 457 * @return the StorelessUnivariateStatistic implementing the minimum 458 * @since 1.2 459 */ 460 public StorelessUnivariateStatistic getMinImpl() { 461 return minImpl; 462 } 463 464 /** 465 * <p>Sets the implementation for the minimum.</p> 466 * <p>This method must be activated before any data has been added - i.e., 467 * before {@link #addValue(double) addValue} has been used to add data; 468 * otherwise an IllegalStateException will be thrown.</p> 469 * 470 * @param minImpl the StorelessUnivariateStatistic instance to use 471 * for computing the minimum 472 * @throws IllegalStateException if data has already been added 473 * (i.e if n > 0) 474 * @since 1.2 475 */ 476 public void setMinImpl(StorelessUnivariateStatistic minImpl) { 477 checkEmpty(); 478 this.minImpl = minImpl; 479 } 480 481 /** 482 * Returns the currently configured maximum implementation 483 * 484 * @return the StorelessUnivariateStatistic implementing the maximum 485 * @since 1.2 486 */ 487 public StorelessUnivariateStatistic getMaxImpl() { 488 return maxImpl; 489 } 490 491 /** 492 * <p>Sets the implementation for the maximum.</p> 493 * <p>This method must be activated before any data has been added - i.e., 494 * before {@link #addValue(double) addValue} has been used to add data; 495 * otherwise an IllegalStateException will be thrown.</p> 496 * 497 * @param maxImpl the StorelessUnivariateStatistic instance to use 498 * for computing the maximum 499 * @throws IllegalStateException if data has already been added 500 * (i.e if n > 0) 501 * @since 1.2 502 */ 503 public void setMaxImpl(StorelessUnivariateStatistic maxImpl) { 504 checkEmpty(); 505 this.maxImpl = maxImpl; 506 } 507 508 /** 509 * Returns the currently configured sum of logs implementation 510 * 511 * @return the StorelessUnivariateStatistic implementing the log sum 512 * @since 1.2 513 */ 514 public StorelessUnivariateStatistic getSumLogImpl() { 515 return sumLogImpl; 516 } 517 518 /** 519 * <p>Sets the implementation for the sum of logs.</p> 520 * <p>This method must be activated before any data has been added - i.e., 521 * before {@link #addValue(double) addValue} has been used to add data; 522 * otherwise an IllegalStateException will be thrown.</p> 523 * 524 * @param sumLogImpl the StorelessUnivariateStatistic instance to use 525 * for computing the log sum 526 * @throws IllegalStateException if data has already been added 527 * (i.e if n > 0) 528 * @since 1.2 529 */ 530 public void setSumLogImpl( 531 StorelessUnivariateStatistic sumLogImpl) { 532 checkEmpty(); 533 this.sumLogImpl = sumLogImpl; 534 geoMean.setSumLogImpl(sumLogImpl); 535 } 536 537 /** 538 * Returns the currently configured geometric mean implementation 539 * 540 * @return the StorelessUnivariateStatistic implementing the geometric mean 541 * @since 1.2 542 */ 543 public StorelessUnivariateStatistic getGeoMeanImpl() { 544 return geoMeanImpl; 545 } 546 547 /** 548 * <p>Sets the implementation for the geometric mean.</p> 549 * <p>This method must be activated before any data has been added - i.e., 550 * before {@link #addValue(double) addValue} has been used to add data; 551 * otherwise an IllegalStateException will be thrown.</p> 552 * 553 * @param geoMeanImpl the StorelessUnivariateStatistic instance to use 554 * for computing the geometric mean 555 * @throws IllegalStateException if data has already been added 556 * (i.e if n > 0) 557 * @since 1.2 558 */ 559 public void setGeoMeanImpl( 560 StorelessUnivariateStatistic geoMeanImpl) { 561 checkEmpty(); 562 this.geoMeanImpl = geoMeanImpl; 563 } 564 565 /** 566 * Returns the currently configured mean implementation 567 * 568 * @return the StorelessUnivariateStatistic implementing the mean 569 * @since 1.2 570 */ 571 public StorelessUnivariateStatistic getMeanImpl() { 572 return meanImpl; 573 } 574 575 /** 576 * <p>Sets the implementation for the mean.</p> 577 * <p>This method must be activated before any data has been added - i.e., 578 * before {@link #addValue(double) addValue} has been used to add data; 579 * otherwise an IllegalStateException will be thrown.</p> 580 * 581 * @param meanImpl the StorelessUnivariateStatistic instance to use 582 * for computing the mean 583 * @throws IllegalStateException if data has already been added 584 * (i.e if n > 0) 585 * @since 1.2 586 */ 587 public void setMeanImpl( 588 StorelessUnivariateStatistic meanImpl) { 589 checkEmpty(); 590 this.meanImpl = meanImpl; 591 } 592 593 /** 594 * Returns the currently configured variance implementation 595 * 596 * @return the StorelessUnivariateStatistic implementing the variance 597 * @since 1.2 598 */ 599 public StorelessUnivariateStatistic getVarianceImpl() { 600 return varianceImpl; 601 } 602 603 /** 604 * <p>Sets the implementation for the variance.</p> 605 * <p>This method must be activated before any data has been added - i.e., 606 * before {@link #addValue(double) addValue} has been used to add data; 607 * otherwise an IllegalStateException will be thrown.</p> 608 * 609 * @param varianceImpl the StorelessUnivariateStatistic instance to use 610 * for computing the variance 611 * @throws IllegalStateException if data has already been added 612 * (i.e if n > 0) 613 * @since 1.2 614 */ 615 public void setVarianceImpl( 616 StorelessUnivariateStatistic varianceImpl) { 617 checkEmpty(); 618 this.varianceImpl = varianceImpl; 619 } 620 621 /** 622 * Throws IllegalStateException if n > 0. 623 */ 624 private void checkEmpty() { 625 if (n > 0) { 626 throw new IllegalStateException( 627 "Implementations must be configured before values are added."); 628 } 629 } 630 631 }