001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math3.stat.ranking;
019    
020    import java.util.ArrayList;
021    import java.util.Arrays;
022    import java.util.Iterator;
023    import java.util.List;
024    
025    import org.apache.commons.math3.exception.MathInternalError;
026    import org.apache.commons.math3.exception.NotANumberException;
027    import org.apache.commons.math3.random.RandomData;
028    import org.apache.commons.math3.random.RandomDataImpl;
029    import org.apache.commons.math3.random.RandomGenerator;
030    import org.apache.commons.math3.util.FastMath;
031    
032    
033    /**
034     * <p> Ranking based on the natural ordering on doubles.</p>
035     * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
036     * are handled using the selected {@link TiesStrategy}.
037     * Configuration settings are supplied in optional constructor arguments.
038     * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE},
039     * respectively. When using {@link TiesStrategy#RANDOM}, a
040     * {@link RandomGenerator} may be supplied as a constructor argument.</p>
041     * <p>Examples:
042     * <table border="1" cellpadding="3">
043     * <tr><th colspan="3">
044     * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
045     * </th></tr>
046     * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
047     * <th><code>rank(data)</code></th>
048     * <tr>
049     * <td>default (NaNs maximal)</td>
050     * <td>default (ties averaged)</td>
051     * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
052     * <tr>
053     * <td>default (NaNs maximal)</td>
054     * <td>MINIMUM</td>
055     * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
056     * <tr>
057     * <td>MINIMAL</td>
058     * <td>default (ties averaged)</td>
059     * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
060     * <tr>
061     * <td>REMOVED</td>
062     * <td>SEQUENTIAL</td>
063     * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
064     * <tr>
065     * <td>MINIMAL</td>
066     * <td>MAXIMUM</td>
067     * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p>
068     *
069     * @since 2.0
070     * @version $Id: NaturalRanking.java 1416643 2012-12-03 19:37:14Z tn $
071     */
072    public class NaturalRanking implements RankingAlgorithm {
073    
074        /** default NaN strategy */
075        public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED;
076    
077        /** default ties strategy */
078        public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
079    
080        /** NaN strategy - defaults to NaNs maximal */
081        private final NaNStrategy nanStrategy;
082    
083        /** Ties strategy - defaults to ties averaged */
084        private final TiesStrategy tiesStrategy;
085    
086        /** Source of random data - used only when ties strategy is RANDOM */
087        private final RandomData randomData;
088    
089        /**
090         * Create a NaturalRanking with default strategies for handling ties and NaNs.
091         */
092        public NaturalRanking() {
093            super();
094            tiesStrategy = DEFAULT_TIES_STRATEGY;
095            nanStrategy = DEFAULT_NAN_STRATEGY;
096            randomData = null;
097        }
098    
099        /**
100         * Create a NaturalRanking with the given TiesStrategy.
101         *
102         * @param tiesStrategy the TiesStrategy to use
103         */
104        public NaturalRanking(TiesStrategy tiesStrategy) {
105            super();
106            this.tiesStrategy = tiesStrategy;
107            nanStrategy = DEFAULT_NAN_STRATEGY;
108            randomData = new RandomDataImpl();
109        }
110    
111        /**
112         * Create a NaturalRanking with the given NaNStrategy.
113         *
114         * @param nanStrategy the NaNStrategy to use
115         */
116        public NaturalRanking(NaNStrategy nanStrategy) {
117            super();
118            this.nanStrategy = nanStrategy;
119            tiesStrategy = DEFAULT_TIES_STRATEGY;
120            randomData = null;
121        }
122    
123        /**
124         * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
125         *
126         * @param nanStrategy NaNStrategy to use
127         * @param tiesStrategy TiesStrategy to use
128         */
129        public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
130            super();
131            this.nanStrategy = nanStrategy;
132            this.tiesStrategy = tiesStrategy;
133            randomData = new RandomDataImpl();
134        }
135    
136        /**
137         * Create a NaturalRanking with TiesStrategy.RANDOM and the given
138         * RandomGenerator as the source of random data.
139         *
140         * @param randomGenerator source of random data
141         */
142        public NaturalRanking(RandomGenerator randomGenerator) {
143            super();
144            this.tiesStrategy = TiesStrategy.RANDOM;
145            nanStrategy = DEFAULT_NAN_STRATEGY;
146            randomData = new RandomDataImpl(randomGenerator);
147        }
148    
149    
150        /**
151         * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
152         * and the given source of random data.
153         *
154         * @param nanStrategy NaNStrategy to use
155         * @param randomGenerator source of random data
156         */
157        public NaturalRanking(NaNStrategy nanStrategy,
158                RandomGenerator randomGenerator) {
159            super();
160            this.nanStrategy = nanStrategy;
161            this.tiesStrategy = TiesStrategy.RANDOM;
162            randomData = new RandomDataImpl(randomGenerator);
163        }
164    
165        /**
166         * Return the NaNStrategy
167         *
168         * @return returns the NaNStrategy
169         */
170        public NaNStrategy getNanStrategy() {
171            return nanStrategy;
172        }
173    
174        /**
175         * Return the TiesStrategy
176         *
177         * @return the TiesStrategy
178         */
179        public TiesStrategy getTiesStrategy() {
180            return tiesStrategy;
181        }
182    
183        /**
184         * Rank <code>data</code> using the natural ordering on Doubles, with
185         * NaN values handled according to <code>nanStrategy</code> and ties
186         * resolved using <code>tiesStrategy.</code>
187         *
188         * @param data array to be ranked
189         * @return array of ranks
190         * @throws NotANumberException if the selected {@link NaNStrategy} is {@code FAILED}
191         * and a {@link Double#NaN} is encountered in the input data
192         */
193        public double[] rank(double[] data) {
194    
195            // Array recording initial positions of data to be ranked
196            IntDoublePair[] ranks = new IntDoublePair[data.length];
197            for (int i = 0; i < data.length; i++) {
198                ranks[i] = new IntDoublePair(data[i], i);
199            }
200    
201            // Recode, remove or record positions of NaNs
202            List<Integer> nanPositions = null;
203            switch (nanStrategy) {
204                case MAXIMAL: // Replace NaNs with +INFs
205                    recodeNaNs(ranks, Double.POSITIVE_INFINITY);
206                    break;
207                case MINIMAL: // Replace NaNs with -INFs
208                    recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
209                    break;
210                case REMOVED: // Drop NaNs from data
211                    ranks = removeNaNs(ranks);
212                    break;
213                case FIXED:   // Record positions of NaNs
214                    nanPositions = getNanPositions(ranks);
215                    break;
216                case FAILED:
217                    nanPositions = getNanPositions(ranks);
218                    if (nanPositions.size() > 0) {
219                        throw new NotANumberException();
220                    }
221                    break;
222                default: // this should not happen unless NaNStrategy enum is changed
223                    throw new MathInternalError();
224            }
225    
226            // Sort the IntDoublePairs
227            Arrays.sort(ranks);
228    
229            // Walk the sorted array, filling output array using sorted positions,
230            // resolving ties as we go
231            double[] out = new double[ranks.length];
232            int pos = 1;  // position in sorted array
233            out[ranks[0].getPosition()] = pos;
234            List<Integer> tiesTrace = new ArrayList<Integer>();
235            tiesTrace.add(ranks[0].getPosition());
236            for (int i = 1; i < ranks.length; i++) {
237                if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
238                    // tie sequence has ended (or had length 1)
239                    pos = i + 1;
240                    if (tiesTrace.size() > 1) {  // if seq is nontrivial, resolve
241                        resolveTie(out, tiesTrace);
242                    }
243                    tiesTrace = new ArrayList<Integer>();
244                    tiesTrace.add(ranks[i].getPosition());
245                } else {
246                    // tie sequence continues
247                    tiesTrace.add(ranks[i].getPosition());
248                }
249                out[ranks[i].getPosition()] = pos;
250            }
251            if (tiesTrace.size() > 1) {  // handle tie sequence at end
252                resolveTie(out, tiesTrace);
253            }
254            if (nanStrategy == NaNStrategy.FIXED) {
255                restoreNaNs(out, nanPositions);
256            }
257            return out;
258        }
259    
260        /**
261         * Returns an array that is a copy of the input array with IntDoublePairs
262         * having NaN values removed.
263         *
264         * @param ranks input array
265         * @return array with NaN-valued entries removed
266         */
267        private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
268            if (!containsNaNs(ranks)) {
269                return ranks;
270            }
271            IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
272            int j = 0;
273            for (int i = 0; i < ranks.length; i++) {
274                if (Double.isNaN(ranks[i].getValue())) {
275                    // drop, but adjust original ranks of later elements
276                    for (int k = i + 1; k < ranks.length; k++) {
277                        ranks[k] = new IntDoublePair(
278                                ranks[k].getValue(), ranks[k].getPosition() - 1);
279                    }
280                } else {
281                    outRanks[j] = new IntDoublePair(
282                            ranks[i].getValue(), ranks[i].getPosition());
283                    j++;
284                }
285            }
286            IntDoublePair[] returnRanks = new IntDoublePair[j];
287            System.arraycopy(outRanks, 0, returnRanks, 0, j);
288            return returnRanks;
289        }
290    
291        /**
292         * Recodes NaN values to the given value.
293         *
294         * @param ranks array to recode
295         * @param value the value to replace NaNs with
296         */
297        private void recodeNaNs(IntDoublePair[] ranks, double value) {
298            for (int i = 0; i < ranks.length; i++) {
299                if (Double.isNaN(ranks[i].getValue())) {
300                    ranks[i] = new IntDoublePair(
301                            value, ranks[i].getPosition());
302                }
303            }
304        }
305    
306        /**
307         * Checks for presence of NaNs in <code>ranks.</code>
308         *
309         * @param ranks array to be searched for NaNs
310         * @return true iff ranks contains one or more NaNs
311         */
312        private boolean containsNaNs(IntDoublePair[] ranks) {
313            for (int i = 0; i < ranks.length; i++) {
314                if (Double.isNaN(ranks[i].getValue())) {
315                    return true;
316                }
317            }
318            return false;
319        }
320    
321        /**
322         * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
323         * The input <code>ranks</code> array is expected to take the same value
324         * for all indices in <code>tiesTrace</code>.  The common value is recoded
325         * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
326         * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
327         * The same array and trace with tiesStrategy AVERAGE will come out
328         * <5,8,3,6,3,7,1,3>.
329         *
330         * @param ranks array of ranks
331         * @param tiesTrace list of indices where <code>ranks</code> is constant
332         * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
333         * </code>
334         */
335        private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
336    
337            // constant value of ranks over tiesTrace
338            final double c = ranks[tiesTrace.get(0)];
339    
340            // length of sequence of tied ranks
341            final int length = tiesTrace.size();
342    
343            switch (tiesStrategy) {
344                case  AVERAGE:  // Replace ranks with average
345                    fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
346                    break;
347                case MAXIMUM:   // Replace ranks with maximum values
348                    fill(ranks, tiesTrace, c + length - 1);
349                    break;
350                case MINIMUM:   // Replace ties with minimum
351                    fill(ranks, tiesTrace, c);
352                    break;
353                case RANDOM:    // Fill with random integral values in [c, c + length - 1]
354                    Iterator<Integer> iterator = tiesTrace.iterator();
355                    long f = FastMath.round(c);
356                    while (iterator.hasNext()) {
357                        // No advertised exception because args are guaranteed valid
358                        ranks[iterator.next()] =
359                            randomData.nextLong(f, f + length - 1);
360                    }
361                    break;
362                case SEQUENTIAL:  // Fill sequentially from c to c + length - 1
363                    // walk and fill
364                    iterator = tiesTrace.iterator();
365                    f = FastMath.round(c);
366                    int i = 0;
367                    while (iterator.hasNext()) {
368                        ranks[iterator.next()] = f + i++;
369                    }
370                    break;
371                default: // this should not happen unless TiesStrategy enum is changed
372                    throw new MathInternalError();
373            }
374        }
375    
376        /**
377         * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
378         *
379         * @param data array to modify
380         * @param tiesTrace list of index values to set
381         * @param value value to set
382         */
383        private void fill(double[] data, List<Integer> tiesTrace, double value) {
384            Iterator<Integer> iterator = tiesTrace.iterator();
385            while (iterator.hasNext()) {
386                data[iterator.next()] = value;
387            }
388        }
389    
390        /**
391         * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
392         *
393         * @param ranks array to modify
394         * @param nanPositions list of index values to set to <code>Double.NaN</code>
395         */
396        private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
397            if (nanPositions.size() == 0) {
398                return;
399            }
400            Iterator<Integer> iterator = nanPositions.iterator();
401            while (iterator.hasNext()) {
402                ranks[iterator.next().intValue()] = Double.NaN;
403            }
404    
405        }
406    
407        /**
408         * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
409         *
410         * @param ranks array to search for <code>NaNs</code>
411         * @return list of indexes i such that <code>ranks[i] = NaN</code>
412         */
413        private List<Integer> getNanPositions(IntDoublePair[] ranks) {
414            ArrayList<Integer> out = new ArrayList<Integer>();
415            for (int i = 0; i < ranks.length; i++) {
416                if (Double.isNaN(ranks[i].getValue())) {
417                    out.add(Integer.valueOf(i));
418                }
419            }
420            return out;
421        }
422    
423        /**
424         * Represents the position of a double value in an ordering.
425         * Comparable interface is implemented so Arrays.sort can be used
426         * to sort an array of IntDoublePairs by value.  Note that the
427         * implicitly defined natural ordering is NOT consistent with equals.
428         */
429        private static class IntDoublePair implements Comparable<IntDoublePair>  {
430    
431            /** Value of the pair */
432            private final double value;
433    
434            /** Original position of the pair */
435            private final int position;
436    
437            /**
438             * Construct an IntDoublePair with the given value and position.
439             * @param value the value of the pair
440             * @param position the original position
441             */
442            public IntDoublePair(double value, int position) {
443                this.value = value;
444                this.position = position;
445            }
446    
447            /**
448             * Compare this IntDoublePair to another pair.
449             * Only the <strong>values</strong> are compared.
450             *
451             * @param other the other pair to compare this to
452             * @return result of <code>Double.compare(value, other.value)</code>
453             */
454            public int compareTo(IntDoublePair other) {
455                return Double.compare(value, other.value);
456            }
457    
458            // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion.
459    
460            /**
461             * Returns the value of the pair.
462             * @return value
463             */
464            public double getValue() {
465                return value;
466            }
467    
468            /**
469             * Returns the original position of the pair.
470             * @return position
471             */
472            public int getPosition() {
473                return position;
474            }
475        }
476    }