View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat;
18  
19  import java.io.Serializable;
20  import java.text.NumberFormat;
21  import java.util.Iterator;
22  import java.util.Comparator;
23  import java.util.TreeMap;
24  
25  /** 
26   * Maintains a frequency distribution.
27   * <p>
28   * Accepts int, long, char or Object values.  New values added must be 
29   * comparable to those that have been added, otherwise the add method will 
30   * throw an IllegalArgumentException.</p>
31   * <p>
32   * Integer values (int, long, Integer, Long) are not distinguished by type -- 
33   * i.e. <code>addValue(new Long(2)), addValue(2), addValue(2l)</code> all have
34   * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p>
35   * <p>
36   * The values are ordered using the default (natural order), unless a  
37   * <code>Comparator</code> is supplied in the constructor.</p>
38   *
39   * @version $Revision: 617953 $ $Date: 2008-02-02 22:54:00 -0700 (Sat, 02 Feb 2008) $
40   */
41  public class Frequency implements Serializable {
42      
43      /** Serializable version identifier */
44      private static final long serialVersionUID = -3845586908418844111L;
45  
46      /** underlying collection */
47      private TreeMap freqTable = null;
48  
49      /**
50       * Default constructor.
51       */
52      public Frequency() {
53          freqTable = new TreeMap();
54      }
55      
56      /**
57       * Constructor allowing values Comparator to be specified.
58       * 
59       * @param comparator Comparator used to order values
60       */
61      public Frequency(Comparator comparator) {
62          freqTable = new TreeMap(comparator);
63      }
64  
65      /**
66       * Return a string representation of this frequency
67       * distribution.
68       * 
69       * @return a string representation.
70       */
71      public String toString() {
72          NumberFormat nf = NumberFormat.getPercentInstance();
73          StringBuffer outBuffer = new StringBuffer();
74          outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
75          Iterator iter = freqTable.keySet().iterator();
76          while (iter.hasNext()) {
77              Object value = iter.next();
78              outBuffer.append(value);
79              outBuffer.append('\t');
80              outBuffer.append(getCount(value));
81              outBuffer.append('\t');
82              outBuffer.append(nf.format(getPct(value)));
83              outBuffer.append('\t');
84              outBuffer.append(nf.format(getCumPct(value)));
85              outBuffer.append('\n');
86          }
87          return outBuffer.toString();
88      }
89  
90      /**
91       * Adds 1 to the frequency count for v.
92       * 
93       * @param v the value to add.
94       * @throws IllegalArgumentException if <code>v</code> is not comparable.
95       */
96      public void addValue(Object v) {
97          Object obj = v;
98          if (v instanceof Integer) {
99             obj = new Long(((Integer) v).longValue());
100         }
101         try {
102             Long count = (Long) freqTable.get(obj);
103             if (count == null) {
104                 freqTable.put(obj, new Long(1));
105             } else {
106                 freqTable.put(obj, new Long(count.longValue() + 1));
107             }
108         } catch (ClassCastException ex) {   
109             //TreeMap will throw ClassCastException if v is not comparable
110             throw new IllegalArgumentException("Value not comparable to existing values.");
111         }
112     }
113 
114     /**
115      * Adds 1 to the frequency count for v.
116      * 
117      * @param v the value to add.
118      */
119     public void addValue(int v) {
120         addValue(new Long(v));
121     }
122     
123     /**
124      * Adds 1 to the frequency count for v.
125      * 
126      * @param v the value to add.
127      */
128     public void addValue(Integer v) {
129         addValue(new Long(v.longValue()));
130     }
131 
132     /**
133      * Adds 1 to the frequency count for v.
134      * 
135      * @param v the value to add.
136      */
137     public void addValue(long v) {
138         addValue(new Long(v));
139     }
140     
141     /**
142      * Adds 1 to the frequency count for v.
143      * 
144      * @param v the value to add.
145      */
146     public void addValue(char v) {
147         addValue(new Character(v));
148     }
149     
150     /** Clears the frequency table */
151     public void clear() {
152         freqTable.clear();
153     }
154     
155     /**
156      * Returns an Iterator over the set of values that have been added.
157      * <p>
158      * If added values are itegral (i.e., integers, longs, Integers, or Longs), 
159      * they are converted to Longs when they are added, so the objects returned
160      * by the Iterator will in this case be Longs.</p>
161      * 
162      * @return values Iterator
163      */
164     public Iterator valuesIterator() {
165         return freqTable.keySet().iterator();
166     }
167     
168     //-------------------------------------------------------------------------
169     
170     /**
171      * Returns the sum of all frequencies.
172      * 
173      * @return the total frequency count.
174      */
175     public long getSumFreq() {
176         long result = 0;
177         Iterator iterator = freqTable.values().iterator();
178         while (iterator.hasNext())  {
179             result += ((Long) iterator.next()).longValue();
180         }
181         return result;
182     }
183 
184     /**
185      * Returns the number of values = v.
186      * 
187      * @param v the value to lookup.
188      * @return the frequency of v.
189      */
190     public long getCount(Object v) {
191         if (v instanceof Integer) {
192             return getCount(((Integer) v).longValue());
193         }
194         long result = 0;
195         try { 
196             Long count =  (Long) freqTable.get(v);
197             if (count != null) {
198                 result = count.longValue();
199             }
200         } catch (ClassCastException ex) {
201             // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
202         }
203         return result;
204     }
205 
206     /**
207      * Returns the number of values = v.
208      * 
209      * @param v the value to lookup.
210      * @return the frequency of v.
211      */
212     public long getCount(int v) {
213         return getCount(new Long(v));
214     }
215     
216     /**
217      * Returns the number of values = v.
218      * 
219      * @param v the value to lookup.
220      * @return the frequency of v.
221      */
222     public long getCount(long v) {
223         return getCount(new Long(v));
224     }
225     
226     /**
227      * Returns the number of values = v.
228      * 
229      * @param v the value to lookup.
230      * @return the frequency of v.
231      */
232     public long getCount(char v) {
233         return getCount(new Character(v));
234     }
235     
236     //-------------------------------------------------------------
237 
238     /**
239       * Returns the percentage of values that are equal to v
240      * (as a proportion between 0 and 1).
241      * <p>
242      * Returns <code>Double.NaN</code> if no values have been added.</p>
243      * 
244      * @param v the value to lookup
245      * @return the proportion of values equal to v
246      */
247     public double getPct(Object v) {
248         if (getSumFreq() == 0) {
249             return Double.NaN;
250         }
251         return (double) getCount(v) / (double) getSumFreq();        
252     }
253     
254     /**
255      * Returns the percentage of values that are equal to v
256      * (as a proportion between 0 and 1).
257      * 
258      * @param v the value to lookup
259      * @return the proportion of values equal to v
260      */
261     public double getPct(int v) {
262         return getPct(new Long(v));       
263     }
264     
265     /**
266      * Returns the percentage of values that are equal to v
267      * (as a proportion between 0 and 1).
268      * 
269      * @param v the value to lookup
270      * @return the proportion of values equal to v
271      */
272     public double getPct(long v) {
273         return getPct(new Long(v));         
274     }
275     
276     /**
277      * Returns the percentage of values that are equal to v
278      * (as a proportion between 0 and 1).
279      * 
280      * @param v the value to lookup
281      * @return the proportion of values equal to v
282      */
283     public double getPct(char v) {
284         return getPct(new Character(v));         
285     }
286     
287     //-----------------------------------------------------------------------------------------
288     
289     /**
290      * Returns the cumulative frequency of values less than or equal to v.
291      * <p>
292      * Returns 0 if v is not comparable to the values set.</p>
293      * 
294      * @param v the value to lookup.
295      * @return the proportion of values equal to v
296      */
297     public long getCumFreq(Object v) {
298         if (getSumFreq() == 0) {
299             return 0;
300         }
301         if (v instanceof Integer) {
302             return getCumFreq(((Integer) v).longValue());
303         }
304         Comparator c = freqTable.comparator();
305         if (c == null) {
306             c = new NaturalComparator();
307         }
308         long result = 0;
309         
310         try {
311             Long value = (Long) freqTable.get(v);
312             if (value != null) {
313                 result = value.longValue();
314             }
315         } catch (ClassCastException ex) {
316             return result;   // v is not comparable
317         }
318         
319         if (c.compare(v, freqTable.firstKey()) < 0) {
320             return 0;  // v is comparable, but less than first value
321         }
322         
323         if (c.compare(v, freqTable.lastKey()) >= 0) {
324             return getSumFreq();    // v is comparable, but greater than the last value
325         }
326         
327         Iterator values = valuesIterator();
328         while (values.hasNext()) {
329             Object nextValue = values.next();
330             if (c.compare(v, nextValue) > 0) {
331                 result += getCount(nextValue);
332             } else {
333                 return result;
334             }
335         }
336         return result;
337     }
338     
339      /**
340      * Returns the cumulative frequency of values less than or equal to v.
341      * <p>
342      * Returns 0 if v is not comparable to the values set.</p>
343      * 
344      * @param v the value to lookup
345      * @return the proportion of values equal to v
346      */
347     public long getCumFreq(int v) {
348         return getCumFreq(new Long(v));       
349     }
350     
351      /**
352      * Returns the cumulative frequency of values less than or equal to v.
353      * <p>
354      * Returns 0 if v is not comparable to the values set.</p>
355      * 
356      * @param v the value to lookup
357      * @return the proportion of values equal to v
358      */
359     public long getCumFreq(long v) {
360         return getCumFreq(new Long(v));         
361     }
362     
363     /**
364      * Returns the cumulative frequency of values less than or equal to v.
365      * <p>
366      * Returns 0 if v is not comparable to the values set.</p>
367      * 
368      * @param v the value to lookup
369      * @return the proportion of values equal to v
370      */
371     public long getCumFreq(char v) {
372         return getCumFreq(new Character(v));         
373     }
374     
375     //----------------------------------------------------------------------------------------------
376     
377     /**
378      * Returns the cumulative percentage of values less than or equal to v
379      * (as a proportion between 0 and 1).
380      * <p>
381      * Returns <code>Double.NaN</code> if no values have been added.
382      * Returns 0 if at least one value has been added, but v is not comparable
383      * to the values set.</p>
384      * 
385      * @param v the value to lookup
386      * @return the proportion of values less than or equal to v
387      */
388     public double getCumPct(Object v) {
389         if (getSumFreq() == 0) {
390             return Double.NaN;
391         }
392         return (double) getCumFreq(v) / (double) getSumFreq();        
393     }
394     
395     /**
396      * Returns the cumulative percentage of values less than or equal to v
397      * (as a proportion between 0 and 1).
398      * <p>
399      * Returns 0 if v is not comparable to the values set.</p>
400      * 
401      * @param v the value to lookup
402      * @return the proportion of values less than or equal to v
403      */
404     public double getCumPct(int v) {
405         return getCumPct(new Long(v));       
406     }
407     
408     /**
409      * Returns the cumulative percentage of values less than or equal to v
410      * (as a proportion between 0 and 1).
411      * <p>
412      * Returns 0 if v is not comparable to the values set.</p>
413      * 
414      * @param v the value to lookup
415      * @return the proportion of values less than or equal to v
416      */
417     public double getCumPct(long v) {
418         return getCumPct(new Long(v));         
419     }
420     
421     /**
422      * Returns the cumulative percentage of values less than or equal to v
423      * (as a proportion between 0 and 1).
424      * <p>
425      * Returns 0 if v is not comparable to the values set.</p>
426      * 
427      * @param v the value to lookup
428      * @return the proportion of values less than or equal to v
429      */
430     public double getCumPct(char v) {
431         return getCumPct(new Character(v));         
432     }
433     
434     /**
435      * A Comparator that compares comparable objects using the
436      * natural order.  Copied from Commons Collections ComparableComparator.
437      */
438     private static class NaturalComparator implements Comparator, Serializable {
439 
440         /** Serializable version identifier */
441         private static final long serialVersionUID = -3852193713161395148L;
442 
443         /**
444          * Compare the two {@link Comparable Comparable} arguments.
445          * This method is equivalent to:
446          * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
447          * 
448          * @param  o1 the first object 
449          * @param  o2 the second object
450          * @return  result of comparison
451          * @throws NullPointerException when <i>o1</i> is <code>null</code>, 
452          *         or when <code>((Comparable)o1).compareTo(o2)</code> does
453          * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable}, 
454          *         or when <code>((Comparable)o1).compareTo(o2)</code> does
455          */
456         public int compare(Object o1, Object o2) {
457             return ((Comparable)o1).compareTo(o2);
458         }
459     }
460 }