1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math.stat; 18 19 import org.apache.commons.math.stat.descriptive.UnivariateStatistic; 20 import org.apache.commons.math.stat.descriptive.moment.GeometricMean; 21 import org.apache.commons.math.stat.descriptive.moment.Mean; 22 import org.apache.commons.math.stat.descriptive.moment.Variance; 23 import org.apache.commons.math.stat.descriptive.rank.Max; 24 import org.apache.commons.math.stat.descriptive.rank.Min; 25 import org.apache.commons.math.stat.descriptive.rank.Percentile; 26 import org.apache.commons.math.stat.descriptive.summary.Product; 27 import org.apache.commons.math.stat.descriptive.summary.Sum; 28 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs; 29 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; 30 31 /** 32 * StatUtils provides static methods for computing statistics based on data 33 * stored in double[] arrays. 34 * 35 * @version $Revision: 617953 $ $Date: 2008-02-02 22:54:00 -0700 (Sat, 02 Feb 2008) $ 36 */ 37 public final class StatUtils { 38 39 /** sum */ 40 private static UnivariateStatistic sum = new Sum(); 41 42 /** sumSq */ 43 private static UnivariateStatistic sumSq = new SumOfSquares(); 44 45 /** prod */ 46 private static UnivariateStatistic prod = new Product(); 47 48 /** sumLog */ 49 private static UnivariateStatistic sumLog = new SumOfLogs(); 50 51 /** min */ 52 private static UnivariateStatistic min = new Min(); 53 54 /** max */ 55 private static UnivariateStatistic max = new Max(); 56 57 /** mean */ 58 private static UnivariateStatistic mean = new Mean(); 59 60 /** variance */ 61 private static Variance variance = new Variance(); 62 63 /** percentile */ 64 private static Percentile percentile = new Percentile(); 65 66 /** geometric mean */ 67 private static GeometricMean geometricMean = new GeometricMean(); 68 69 /** 70 * Private Constructor 71 */ 72 private StatUtils() { 73 } 74 75 /** 76 * Returns the sum of the values in the input array, or 77 * <code>Double.NaN</code> if the array is empty. 78 * <p> 79 * Throws <code>IllegalArgumentException</code> if the input array 80 * is null.</p> 81 * 82 * @param values array of values to sum 83 * @return the sum of the values or <code>Double.NaN</code> if the array 84 * is empty 85 * @throws IllegalArgumentException if the array is null 86 */ 87 public static double sum(final double[] values) { 88 return sum.evaluate(values); 89 } 90 91 /** 92 * Returns the sum of the entries in the specified portion of 93 * the input array, or <code>Double.NaN</code> if the designated subarray 94 * is empty. 95 * <p> 96 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 97 * 98 * @param values the input array 99 * @param begin index of the first array element to include 100 * @param length the number of elements to include 101 * @return the sum of the values or Double.NaN if length = 0 102 * @throws IllegalArgumentException if the array is null or the array index 103 * parameters are not valid 104 */ 105 public static double sum(final double[] values, final int begin, 106 final int length) { 107 return sum.evaluate(values, begin, length); 108 } 109 110 /** 111 * Returns the sum of the squares of the entries in the input array, or 112 * <code>Double.NaN</code> if the array is empty. 113 * <p> 114 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 115 * 116 * @param values input array 117 * @return the sum of the squared values or <code>Double.NaN</code> if the 118 * array is empty 119 * @throws IllegalArgumentException if the array is null 120 */ 121 public static double sumSq(final double[] values) { 122 return sumSq.evaluate(values); 123 } 124 125 /** 126 * Returns the sum of the squares of the entries in the specified portion of 127 * the input array, or <code>Double.NaN</code> if the designated subarray 128 * is empty. 129 * <p> 130 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 131 * 132 * @param values the input array 133 * @param begin index of the first array element to include 134 * @param length the number of elements to include 135 * @return the sum of the squares of the values or Double.NaN if length = 0 136 * @throws IllegalArgumentException if the array is null or the array index 137 * parameters are not valid 138 */ 139 public static double sumSq(final double[] values, final int begin, 140 final int length) { 141 return sumSq.evaluate(values, begin, length); 142 } 143 144 /** 145 * Returns the product of the entries in the input array, or 146 * <code>Double.NaN</code> if the array is empty. 147 * <p> 148 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 149 * 150 * @param values the input array 151 * @return the product of the values or Double.NaN if the array is empty 152 * @throws IllegalArgumentException if the array is null 153 */ 154 public static double product(final double[] values) { 155 return prod.evaluate(values); 156 } 157 158 /** 159 * Returns the product of the entries in the specified portion of 160 * the input array, or <code>Double.NaN</code> if the designated subarray 161 * is empty. 162 * <p> 163 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 164 * 165 * @param values the input array 166 * @param begin index of the first array element to include 167 * @param length the number of elements to include 168 * @return the product of the values or Double.NaN if length = 0 169 * @throws IllegalArgumentException if the array is null or the array index 170 * parameters are not valid 171 */ 172 public static double product(final double[] values, final int begin, 173 final int length) { 174 return prod.evaluate(values, begin, length); 175 } 176 177 /** 178 * Returns the sum of the natural logs of the entries in the input array, or 179 * <code>Double.NaN</code> if the array is empty. 180 * <p> 181 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 182 * <p> 183 * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}. 184 * </p> 185 * 186 * @param values the input array 187 * @return the sum of the natural logs of the values or Double.NaN if 188 * the array is empty 189 * @throws IllegalArgumentException if the array is null 190 */ 191 public static double sumLog(final double[] values) { 192 return sumLog.evaluate(values); 193 } 194 195 /** 196 * Returns the sum of the natural logs of the entries in the specified portion of 197 * the input array, or <code>Double.NaN</code> if the designated subarray 198 * is empty. 199 * <p> 200 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 201 * <p> 202 * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}. 203 * </p> 204 * 205 * @param values the input array 206 * @param begin index of the first array element to include 207 * @param length the number of elements to include 208 * @return the sum of the natural logs of the values or Double.NaN if 209 * length = 0 210 * @throws IllegalArgumentException if the array is null or the array index 211 * parameters are not valid 212 */ 213 public static double sumLog(final double[] values, final int begin, 214 final int length) { 215 return sumLog.evaluate(values, begin, length); 216 } 217 218 /** 219 * Returns the arithmetic mean of the entries in the input array, or 220 * <code>Double.NaN</code> if the array is empty. 221 * <p> 222 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 223 * <p> 224 * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for 225 * details on the computing algorithm.</p> 226 * 227 * @param values the input array 228 * @return the mean of the values or Double.NaN if the array is empty 229 * @throws IllegalArgumentException if the array is null 230 */ 231 public static double mean(final double[] values) { 232 return mean.evaluate(values); 233 } 234 235 /** 236 * Returns the arithmetic mean of the entries in the specified portion of 237 * the input array, or <code>Double.NaN</code> if the designated subarray 238 * is empty. 239 * <p> 240 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 241 * <p> 242 * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for 243 * details on the computing algorithm.</p> 244 * 245 * @param values the input array 246 * @param begin index of the first array element to include 247 * @param length the number of elements to include 248 * @return the mean of the values or Double.NaN if length = 0 249 * @throws IllegalArgumentException if the array is null or the array index 250 * parameters are not valid 251 */ 252 public static double mean(final double[] values, final int begin, 253 final int length) { 254 return mean.evaluate(values, begin, length); 255 } 256 257 /** 258 * Returns the geometric mean of the entries in the input array, or 259 * <code>Double.NaN</code> if the array is empty. 260 * <p> 261 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 262 * <p> 263 * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean} 264 * for details on the computing algorithm.</p> 265 * 266 * @param values the input array 267 * @return the geometric mean of the values or Double.NaN if the array is empty 268 * @throws IllegalArgumentException if the array is null 269 */ 270 public static double geometricMean(final double[] values) { 271 return geometricMean.evaluate(values); 272 } 273 274 /** 275 * Returns the geometric mean of the entries in the specified portion of 276 * the input array, or <code>Double.NaN</code> if the designated subarray 277 * is empty. 278 * <p> 279 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 280 * <p> 281 * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean} 282 * for details on the computing algorithm.</p> 283 * 284 * @param values the input array 285 * @param begin index of the first array element to include 286 * @param length the number of elements to include 287 * @return the geometric mean of the values or Double.NaN if length = 0 288 * @throws IllegalArgumentException if the array is null or the array index 289 * parameters are not valid 290 */ 291 public static double geometricMean(final double[] values, final int begin, 292 final int length) { 293 return geometricMean.evaluate(values, begin, length); 294 } 295 296 297 /** 298 * Returns the variance of the entries in the input array, or 299 * <code>Double.NaN</code> if the array is empty. 300 * <p> 301 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for 302 * details on the computing algorithm.</p> 303 * <p> 304 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 305 * <p> 306 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 307 * 308 * @param values the input array 309 * @return the variance of the values or Double.NaN if the array is empty 310 * @throws IllegalArgumentException if the array is null 311 */ 312 public static double variance(final double[] values) { 313 return variance.evaluate(values); 314 } 315 316 /** 317 * Returns the variance of the entries in the specified portion of 318 * the input array, or <code>Double.NaN</code> if the designated subarray 319 * is empty. 320 * <p> 321 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for 322 * details on the computing algorithm.</p> 323 * <p> 324 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 325 * <p> 326 * Throws <code>IllegalArgumentException</code> if the array is null or the 327 * array index parameters are not valid.</p> 328 * 329 * @param values the input array 330 * @param begin index of the first array element to include 331 * @param length the number of elements to include 332 * @return the variance of the values or Double.NaN if length = 0 333 * @throws IllegalArgumentException if the array is null or the array index 334 * parameters are not valid 335 */ 336 public static double variance(final double[] values, final int begin, 337 final int length) { 338 return variance.evaluate(values, begin, length); 339 } 340 341 /** 342 * Returns the variance of the entries in the specified portion of 343 * the input array, using the precomputed mean value. Returns 344 * <code>Double.NaN</code> if the designated subarray is empty. 345 * <p> 346 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for 347 * details on the computing algorithm.</p> 348 * <p> 349 * The formula used assumes that the supplied mean value is the arithmetic 350 * mean of the sample data, not a known population parameter. This method 351 * is supplied only to save computation when the mean has already been 352 * computed.</p> 353 * <p> 354 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 355 * <p> 356 * Throws <code>IllegalArgumentException</code> if the array is null or the 357 * array index parameters are not valid.</p> 358 * 359 * @param values the input array 360 * @param mean the precomputed mean value 361 * @param begin index of the first array element to include 362 * @param length the number of elements to include 363 * @return the variance of the values or Double.NaN if length = 0 364 * @throws IllegalArgumentException if the array is null or the array index 365 * parameters are not valid 366 */ 367 public static double variance(final double[] values, final double mean, 368 final int begin, final int length) { 369 return variance.evaluate(values, mean, begin, length); 370 } 371 372 /** 373 * Returns the variance of the entries in the input array, using the 374 * precomputed mean value. Returns <code>Double.NaN</code> if the array 375 * is empty. 376 * <p> 377 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for 378 * details on the computing algorithm.</p> 379 * <p> 380 * The formula used assumes that the supplied mean value is the arithmetic 381 * mean of the sample data, not a known population parameter. This method 382 * is supplied only to save computation when the mean has already been 383 * computed.</p> 384 * <p> 385 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 386 * <p> 387 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 388 * 389 * @param values the input array 390 * @param mean the precomputed mean value 391 * @return the variance of the values or Double.NaN if the array is empty 392 * @throws IllegalArgumentException if the array is null 393 */ 394 public static double variance(final double[] values, final double mean) { 395 return variance.evaluate(values, mean); 396 } 397 398 /** 399 * Returns the maximum of the entries in the input array, or 400 * <code>Double.NaN</code> if the array is empty. 401 * <p> 402 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 403 * <p> 404 * <ul> 405 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 406 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> 407 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>, 408 * the result is <code>Double.POSITIVE_INFINITY.</code></li> 409 * </ul></p> 410 * 411 * @param values the input array 412 * @return the maximum of the values or Double.NaN if the array is empty 413 * @throws IllegalArgumentException if the array is null 414 */ 415 public static double max(final double[] values) { 416 return max.evaluate(values); 417 } 418 419 /** 420 * Returns the maximum of the entries in the specified portion of 421 * the input array, or <code>Double.NaN</code> if the designated subarray 422 * is empty. 423 * <p> 424 * Throws <code>IllegalArgumentException</code> if the array is null or 425 * the array index parameters are not valid.</p> 426 * <p> 427 * <ul> 428 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 429 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> 430 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>, 431 * the result is <code>Double.POSITIVE_INFINITY.</code></li> 432 * </ul></p> 433 * 434 * @param values the input array 435 * @param begin index of the first array element to include 436 * @param length the number of elements to include 437 * @return the maximum of the values or Double.NaN if length = 0 438 * @throws IllegalArgumentException if the array is null or the array index 439 * parameters are not valid 440 */ 441 public static double max(final double[] values, final int begin, 442 final int length) { 443 return max.evaluate(values, begin, length); 444 } 445 446 /** 447 * Returns the minimum of the entries in the input array, or 448 * <code>Double.NaN</code> if the array is empty. 449 * <p> 450 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 451 * <p> 452 * <ul> 453 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 454 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> 455 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>, 456 * the result is <code>Double.NEGATIVE_INFINITY.</code></li> 457 * </ul> </p> 458 * 459 * @param values the input array 460 * @return the minimum of the values or Double.NaN if the array is empty 461 * @throws IllegalArgumentException if the array is null 462 */ 463 public static double min(final double[] values) { 464 return min.evaluate(values); 465 } 466 467 /** 468 * Returns the minimum of the entries in the specified portion of 469 * the input array, or <code>Double.NaN</code> if the designated subarray 470 * is empty. 471 * <p> 472 * Throws <code>IllegalArgumentException</code> if the array is null or 473 * the array index parameters are not valid.</p> 474 * <p> 475 * <ul> 476 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 477 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> 478 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>, 479 * the result is <code>Double.NEGATIVE_INFINITY.</code></li> 480 * </ul></p> 481 * 482 * @param values the input array 483 * @param begin index of the first array element to include 484 * @param length the number of elements to include 485 * @return the minimum of the values or Double.NaN if length = 0 486 * @throws IllegalArgumentException if the array is null or the array index 487 * parameters are not valid 488 */ 489 public static double min(final double[] values, final int begin, 490 final int length) { 491 return min.evaluate(values, begin, length); 492 } 493 494 /** 495 * Returns an estimate of the <code>p</code>th percentile of the values 496 * in the <code>values</code> array. 497 * <p> 498 * <ul> 499 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length 500 * <code>0</code></li></p> 501 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code> 502 * if <code>values</code> has length <code>1</code></li> 503 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> 504 * is null or p is not a valid quantile value (p must be greater than 0 505 * and less than or equal to 100)</li> 506 * </ul></p> 507 * <p> 508 * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for 509 * a description of the percentile estimation algorithm used.</p> 510 * 511 * @param values input array of values 512 * @param p the percentile value to compute 513 * @return the percentile value or Double.NaN if the array is empty 514 * @throws IllegalArgumentException if <code>values</code> is null 515 * or p is invalid 516 */ 517 public static double percentile(final double[] values, final double p) { 518 return percentile.evaluate(values,p); 519 } 520 521 /** 522 * Returns an estimate of the <code>p</code>th percentile of the values 523 * in the <code>values</code> array, starting with the element in (0-based) 524 * position <code>begin</code> in the array and including <code>length</code> 525 * values. 526 * <p> 527 * <ul> 528 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> 529 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code> 530 * if <code>length = 1 </code></li> 531 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> 532 * is null , <code>begin</code> or <code>length</code> is invalid, or 533 * <code>p</code> is not a valid quantile value (p must be greater than 0 534 * and less than or equal to 100)</li> 535 * </ul></p> 536 * <p> 537 * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for 538 * a description of the percentile estimation algorithm used.</p> 539 * 540 * @param values array of input values 541 * @param p the percentile to compute 542 * @param begin the first (0-based) element to include in the computation 543 * @param length the number of array elements to include 544 * @return the percentile value 545 * @throws IllegalArgumentException if the parameters are not valid or the 546 * input array is null 547 */ 548 public static double percentile(final double[] values, final int begin, 549 final int length, final double p) { 550 return percentile.evaluate(values, begin, length, p); 551 } 552 553 /** 554 * Returns the sum of the (signed) differences between corresponding elements of the 555 * input arrays -- i.e., sum(sample1[i] - sample2[i]). 556 * 557 * @param sample1 the first array 558 * @param sample2 the second array 559 * @return sum of paired differences 560 * @throws IllegalArgumentException if the arrays do not have the same 561 * (positive) length 562 */ 563 public static double sumDifference(final double[] sample1, final double[] sample2) 564 throws IllegalArgumentException { 565 int n = sample1.length; 566 if (n != sample2.length || n < 1) { 567 throw new IllegalArgumentException 568 ("Input arrays must have the same (positive) length."); 569 } 570 double result = 0; 571 for (int i = 0; i < n; i++) { 572 result += sample1[i] - sample2[i]; 573 } 574 return result; 575 } 576 577 /** 578 * Returns the mean of the (signed) differences between corresponding elements of the 579 * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length. 580 * 581 * @param sample1 the first array 582 * @param sample2 the second array 583 * @return mean of paired differences 584 * @throws IllegalArgumentException if the arrays do not have the same 585 * (positive) length 586 */ 587 public static double meanDifference(final double[] sample1, final double[] sample2) 588 throws IllegalArgumentException { 589 return sumDifference(sample1, sample2) / (double) sample1.length; 590 } 591 592 /** 593 * Returns the variance of the (signed) differences between corresponding elements of the 594 * input arrays -- i.e., var(sample1[i] - sample2[i]). 595 * 596 * @param sample1 the first array 597 * @param sample2 the second array 598 * @param meanDifference the mean difference between corresponding entries 599 * @see #meanDifference(double[],double[]) 600 * @return variance of paired differences 601 * @throws IllegalArgumentException if the arrays do not have the same 602 * length or their common length is less than 2. 603 */ 604 public static double varianceDifference(final double[] sample1, final double[] sample2, 605 double meanDifference) throws IllegalArgumentException { 606 double sum1 = 0d; 607 double sum2 = 0d; 608 double diff = 0d; 609 int n = sample1.length; 610 if (n < 2 || n != sample2.length) { 611 throw new IllegalArgumentException("Input array lengths must be equal and at least 2."); 612 } 613 for (int i = 0; i < n; i++) { 614 diff = sample1[i] - sample2[i]; 615 sum1 += (diff - meanDifference) *(diff - meanDifference); 616 sum2 += diff - meanDifference; 617 } 618 return (sum1 - (sum2 * sum2 / (double) n)) / (double) (n - 1); 619 } 620 621 }