1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math.stat.descriptive.rank; 18 19 import java.io.Serializable; 20 import java.util.Arrays; 21 import org.apache.commons.math.stat.descriptive.AbstractUnivariateStatistic; 22 23 /** 24 * Provides percentile computation. 25 * <p> 26 * There are several commonly used methods for estimating percentiles (a.k.a. 27 * quantiles) based on sample data. For large samples, the different methods 28 * agree closely, but when sample sizes are small, different methods will give 29 * significantly different results. The algorithm implemented here works as follows: 30 * <ol> 31 * <li>Let <code>n</code> be the length of the (sorted) array and 32 * <code>0 < p <= 100</code> be the desired percentile.</li> 33 * <li>If <code> n = 1 </code> return the unique array element (regardless of 34 * the value of <code>p</code>); otherwise </li> 35 * <li>Compute the estimated percentile position 36 * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code> 37 * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional 38 * part of <code>pos</code>). If <code>pos >= n</code> return the largest 39 * element in the array; otherwise</li> 40 * <li>Let <code>lower</code> be the element in position 41 * <code>floor(pos)</code> in the array and let <code>upper</code> be the 42 * next element in the array. Return <code>lower + d * (upper - lower)</code> 43 * </li> 44 * </ol></p> 45 * <p> 46 * To compute percentiles, the data must be (totally) ordered. Input arrays 47 * are copied and then sorted using {@link java.util.Arrays#sort(double[])}. 48 * The ordering used by <code>Arrays.sort(double[])</code> is the one determined 49 * by {@link java.lang.Double#compareTo(Double)}. This ordering makes 50 * <code>Double.NaN</code> larger than any other value (including 51 * <code>Double.POSITIVE_INFINITY</code>). Therefore, for example, the median 52 * (50th percentile) of 53 * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code></p> 54 * <p> 55 * Since percentile estimation usually involves interpolation between array 56 * elements, arrays containing <code>NaN</code> or infinite values will often 57 * result in <code>NaN<code> or infinite values returned.</p> 58 * <p> 59 * <strong>Note that this implementation is not synchronized.</strong> If 60 * multiple threads access an instance of this class concurrently, and at least 61 * one of the threads invokes the <code>increment()</code> or 62 * <code>clear()</code> method, it must be synchronized externally.</p> 63 * 64 * @version $Revision: 617953 $ $Date: 2008-02-02 22:54:00 -0700 (Sat, 02 Feb 2008) $ 65 */ 66 public class Percentile extends AbstractUnivariateStatistic implements Serializable { 67 68 /** Serializable version identifier */ 69 private static final long serialVersionUID = -8091216485095130416L; 70 71 /** Determines what percentile is computed when evaluate() is activated 72 * with no quantile argument */ 73 private double quantile = 0.0; 74 75 /** 76 * Constructs a Percentile with a default quantile 77 * value of 50.0. 78 */ 79 public Percentile() { 80 this(50.0); 81 } 82 83 /** 84 * Constructs a Percentile with the specific quantile value. 85 * @param p the quantile 86 * @throws IllegalArgumentException if p is not greater than 0 and less 87 * than or equal to 100 88 */ 89 public Percentile(final double p) { 90 setQuantile(p); 91 } 92 93 /** 94 * Returns an estimate of the <code>p</code>th percentile of the values 95 * in the <code>values</code> array. 96 * <p> 97 * Calls to this method do not modify the internal <code>quantile</code> 98 * state of this statistic.</p> 99 * <p> 100 * <ul> 101 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length 102 * <code>0</code></li> 103 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code> 104 * if <code>values</code> has length <code>1</code></li> 105 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> 106 * is null or p is not a valid quantile value (p must be greater than 0 107 * and less than or equal to 100) </li> 108 * </ul></p> 109 * <p> 110 * See {@link Percentile} for a description of the percentile estimation 111 * algorithm used.</p> 112 * 113 * @param values input array of values 114 * @param p the percentile value to compute 115 * @return the percentile value or Double.NaN if the array is empty 116 * @throws IllegalArgumentException if <code>values</code> is null 117 * or p is invalid 118 */ 119 public double evaluate(final double[] values, final double p) { 120 test(values, 0, 0); 121 return evaluate(values, 0, values.length, p); 122 } 123 124 /** 125 * Returns an estimate of the <code>quantile</code>th percentile of the 126 * designated values in the <code>values</code> array. The quantile 127 * estimated is determined by the <code>quantile</code> property. 128 * <p> 129 * <ul> 130 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> 131 * <li>Returns (for any value of <code>quantile</code>) 132 * <code>values[begin]</code> if <code>length = 1 </code></li> 133 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> 134 * is null, or <code>start</code> or <code>length</code> 135 * is invalid</li> 136 * </ul></p> 137 * <p> 138 * See {@link Percentile} for a description of the percentile estimation 139 * algorithm used.</p> 140 * 141 * @param values the input array 142 * @param start index of the first array element to include 143 * @param length the number of elements to include 144 * @return the percentile value 145 * @throws IllegalArgumentException if the parameters are not valid 146 * 147 */ 148 public double evaluate( final double[] values, final int start, final int length) { 149 return evaluate(values, start, length, quantile); 150 } 151 152 /** 153 * Returns an estimate of the <code>p</code>th percentile of the values 154 * in the <code>values</code> array, starting with the element in (0-based) 155 * position <code>begin</code> in the array and including <code>length</code> 156 * values. 157 * <p> 158 * Calls to this method do not modify the internal <code>quantile</code> 159 * state of this statistic.</p> 160 * <p> 161 * <ul> 162 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> 163 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code> 164 * if <code>length = 1 </code></li> 165 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> 166 * is null , <code>begin</code> or <code>length</code> is invalid, or 167 * <code>p</code> is not a valid quantile value (p must be greater than 0 168 * and less than or equal to 100)</li> 169 * </ul></p> 170 * <p> 171 * See {@link Percentile} for a description of the percentile estimation 172 * algorithm used.</p> 173 * 174 * @param values array of input values 175 * @param p the percentile to compute 176 * @param begin the first (0-based) element to include in the computation 177 * @param length the number of array elements to include 178 * @return the percentile value 179 * @throws IllegalArgumentException if the parameters are not valid or the 180 * input array is null 181 */ 182 public double evaluate(final double[] values, final int begin, 183 final int length, final double p) { 184 185 test(values, begin, length); 186 187 if ((p > 100) || (p <= 0)) { 188 throw new IllegalArgumentException("invalid quantile value: " + p); 189 } 190 if (length == 0) { 191 return Double.NaN; 192 } 193 if (length == 1) { 194 return values[begin]; // always return single value for n = 1 195 } 196 double n = (double) length; 197 double pos = p * (n + 1) / 100; 198 double fpos = Math.floor(pos); 199 int intPos = (int) fpos; 200 double dif = pos - fpos; 201 double[] sorted = new double[length]; 202 System.arraycopy(values, begin, sorted, 0, length); 203 Arrays.sort(sorted); 204 205 if (pos < 1) { 206 return sorted[0]; 207 } 208 if (pos >= n) { 209 return sorted[length - 1]; 210 } 211 double lower = sorted[intPos - 1]; 212 double upper = sorted[intPos]; 213 return lower + dif * (upper - lower); 214 } 215 216 /** 217 * Returns the value of the quantile field (determines what percentile is 218 * computed when evaluate() is called with no quantile argument). 219 * 220 * @return quantile 221 */ 222 public double getQuantile() { 223 return quantile; 224 } 225 226 /** 227 * Sets the value of the quantile field (determines what percentile is 228 * computed when evaluate() is called with no quantile argument). 229 * 230 * @param p a value between 0 < p <= 100 231 * @throws IllegalArgumentException if p is not greater than 0 and less 232 * than or equal to 100 233 */ 234 public void setQuantile(final double p) { 235 if (p <= 0 || p > 100) { 236 throw new IllegalArgumentException("Illegal quantile value: " + p); 237 } 238 quantile = p; 239 } 240 241 }