View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.descriptive.rank;
18  
19  import java.io.Serializable;
20  import java.util.Arrays;
21  import org.apache.commons.math.stat.descriptive.AbstractUnivariateStatistic;
22  
23  /**
24   * Provides percentile computation.
25   * <p>
26   * There are several commonly used methods for estimating percentiles (a.k.a. 
27   * quantiles) based on sample data.  For large samples, the different methods 
28   * agree closely, but when sample sizes are small, different methods will give
29   * significantly different results.  The algorithm implemented here works as follows:
30   * <ol>
31   * <li>Let <code>n</code> be the length of the (sorted) array and 
32   * <code>0 < p <= 100</code> be the desired percentile.</li>
33   * <li>If <code> n = 1 </code> return the unique array element (regardless of 
34   * the value of <code>p</code>); otherwise </li>
35   * <li>Compute the estimated percentile position  
36   * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
37   * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
38   * part of <code>pos</code>).  If <code>pos >= n</code> return the largest
39   * element in the array; otherwise</li>
40   * <li>Let <code>lower</code> be the element in position 
41   * <code>floor(pos)</code> in the array and let <code>upper</code> be the
42   * next element in the array.  Return <code>lower + d * (upper - lower)</code>
43   * </li>
44   * </ol></p>
45   * <p>
46   * To compute percentiles, the data must be (totally) ordered.  Input arrays
47   * are copied and then sorted using  {@link java.util.Arrays#sort(double[])}.
48   * The ordering used by <code>Arrays.sort(double[])</code> is the one determined
49   * by {@link java.lang.Double#compareTo(Double)}.  This ordering makes 
50   * <code>Double.NaN</code> larger than any other value (including 
51   * <code>Double.POSITIVE_INFINITY</code>).  Therefore, for example, the median
52   * (50th percentile) of  
53   * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code></p>
54   * <p>
55   * Since percentile estimation usually involves interpolation between array 
56   * elements, arrays containing  <code>NaN</code> or infinite values will often
57   * result in <code>NaN<code> or infinite values returned.</p>
58   * <p>
59   * <strong>Note that this implementation is not synchronized.</strong> If 
60   * multiple threads access an instance of this class concurrently, and at least
61   * one of the threads invokes the <code>increment()</code> or 
62   * <code>clear()</code> method, it must be synchronized externally.</p>
63   * 
64   * @version $Revision: 617953 $ $Date: 2008-02-02 22:54:00 -0700 (Sat, 02 Feb 2008) $
65   */
66  public class Percentile extends AbstractUnivariateStatistic implements Serializable {
67  
68      /** Serializable version identifier */
69      private static final long serialVersionUID = -8091216485095130416L; 
70         
71      /** Determines what percentile is computed when evaluate() is activated 
72       * with no quantile argument */
73      private double quantile = 0.0;
74  
75      /**
76       * Constructs a Percentile with a default quantile
77       * value of 50.0.
78       */
79      public Percentile() {
80          this(50.0);
81      }
82  
83      /**
84       * Constructs a Percentile with the specific quantile value.
85       * @param p the quantile
86       * @throws IllegalArgumentException  if p is not greater than 0 and less
87       * than or equal to 100
88       */
89      public Percentile(final double p) {
90          setQuantile(p);
91      }
92  
93      /**
94       * Returns an estimate of the <code>p</code>th percentile of the values
95       * in the <code>values</code> array.
96       * <p>
97       * Calls to this method do not modify the internal <code>quantile</code>
98       * state of this statistic.</p>
99       * <p>
100      * <ul>
101      * <li>Returns <code>Double.NaN</code> if <code>values</code> has length 
102      * <code>0</code></li>
103      * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
104      *  if <code>values</code> has length <code>1</code></li>
105      * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
106      * is null or p is not a valid quantile value (p must be greater than 0
107      * and less than or equal to 100) </li>
108      * </ul></p>
109      * <p>
110      * See {@link Percentile} for a description of the percentile estimation
111      * algorithm used.</p>
112      * 
113      * @param values input array of values
114      * @param p the percentile value to compute
115      * @return the percentile value or Double.NaN if the array is empty
116      * @throws IllegalArgumentException if <code>values</code> is null 
117      *     or p is invalid
118      */
119     public double evaluate(final double[] values, final double p) {
120         test(values, 0, 0);
121         return evaluate(values, 0, values.length, p);
122     }
123 
124     /**
125      * Returns an estimate of the <code>quantile</code>th percentile of the
126      * designated values in the <code>values</code> array.  The quantile
127      * estimated is determined by the <code>quantile</code> property.
128      * <p>
129      * <ul>
130      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
131      * <li>Returns (for any value of <code>quantile</code>) 
132      * <code>values[begin]</code> if <code>length = 1 </code></li>
133      * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
134      * is null,  or <code>start</code> or <code>length</code> 
135      * is invalid</li>
136      * </ul></p>
137      * <p>
138      * See {@link Percentile} for a description of the percentile estimation
139      * algorithm used.</p>
140      * 
141      * @param values the input array
142      * @param start index of the first array element to include
143      * @param length the number of elements to include
144      * @return the percentile value
145      * @throws IllegalArgumentException if the parameters are not valid
146      * 
147      */
148     public double evaluate( final double[] values, final int start, final int length) {
149         return evaluate(values, start, length, quantile);
150     }
151 
152      /**
153      * Returns an estimate of the <code>p</code>th percentile of the values
154      * in the <code>values</code> array, starting with the element in (0-based)
155      * position <code>begin</code> in the array and including <code>length</code>
156      * values.
157      * <p>
158      * Calls to this method do not modify the internal <code>quantile</code>
159      * state of this statistic.</p>
160      * <p>
161      * <ul>
162      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
163      * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
164      *  if <code>length = 1 </code></li>
165      * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
166      *  is null , <code>begin</code> or <code>length</code> is invalid, or 
167      * <code>p</code> is not a valid quantile value (p must be greater than 0
168      * and less than or equal to 100)</li>
169      * </ul></p>
170      * <p>
171      * See {@link Percentile} for a description of the percentile estimation
172      * algorithm used.</p>
173      * 
174      * @param values array of input values
175      * @param p  the percentile to compute
176      * @param begin  the first (0-based) element to include in the computation
177      * @param length  the number of array elements to include
178      * @return  the percentile value
179      * @throws IllegalArgumentException if the parameters are not valid or the
180      * input array is null
181      */
182     public double evaluate(final double[] values, final int begin, 
183             final int length, final double p) {
184 
185         test(values, begin, length);
186 
187         if ((p > 100) || (p <= 0)) {
188             throw new IllegalArgumentException("invalid quantile value: " + p);
189         }
190         if (length == 0) {
191             return Double.NaN;
192         }
193         if (length == 1) {
194             return values[begin]; // always return single value for n = 1
195         }
196         double n = (double) length;
197         double pos = p * (n + 1) / 100;
198         double fpos = Math.floor(pos);
199         int intPos = (int) fpos;
200         double dif = pos - fpos;
201         double[] sorted = new double[length];
202         System.arraycopy(values, begin, sorted, 0, length);
203         Arrays.sort(sorted);
204 
205         if (pos < 1) {
206             return sorted[0];
207         }
208         if (pos >= n) {
209             return sorted[length - 1];
210         }
211         double lower = sorted[intPos - 1];
212         double upper = sorted[intPos];
213         return lower + dif * (upper - lower);
214     }
215 
216     /**
217      * Returns the value of the quantile field (determines what percentile is
218      * computed when evaluate() is called with no quantile argument).
219      * 
220      * @return quantile
221      */
222     public double getQuantile() {
223         return quantile;
224     }
225 
226     /**
227      * Sets the value of the quantile field (determines what percentile is 
228      * computed when evaluate() is called with no quantile argument).
229      * 
230      * @param p a value between 0 < p <= 100 
231      * @throws IllegalArgumentException  if p is not greater than 0 and less
232      * than or equal to 100
233      */
234     public void setQuantile(final double p) {
235         if (p <= 0 || p > 100) {
236             throw new IllegalArgumentException("Illegal quantile value: " + p);
237         }
238         quantile = p;
239     }
240 
241 }