001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.math3.stat.ranking; 019 020 import java.util.ArrayList; 021 import java.util.Arrays; 022 import java.util.Iterator; 023 import java.util.List; 024 025 import org.apache.commons.math3.exception.MathInternalError; 026 import org.apache.commons.math3.exception.NotANumberException; 027 import org.apache.commons.math3.random.RandomData; 028 import org.apache.commons.math3.random.RandomDataImpl; 029 import org.apache.commons.math3.random.RandomGenerator; 030 import org.apache.commons.math3.util.FastMath; 031 032 033 /** 034 * <p> Ranking based on the natural ordering on doubles.</p> 035 * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties 036 * are handled using the selected {@link TiesStrategy}. 037 * Configuration settings are supplied in optional constructor arguments. 038 * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE}, 039 * respectively. When using {@link TiesStrategy#RANDOM}, a 040 * {@link RandomGenerator} may be supplied as a constructor argument.</p> 041 * <p>Examples: 042 * <table border="1" cellpadding="3"> 043 * <tr><th colspan="3"> 044 * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17) 045 * </th></tr> 046 * <tr><th>NaNStrategy</th><th>TiesStrategy</th> 047 * <th><code>rank(data)</code></th> 048 * <tr> 049 * <td>default (NaNs maximal)</td> 050 * <td>default (ties averaged)</td> 051 * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr> 052 * <tr> 053 * <td>default (NaNs maximal)</td> 054 * <td>MINIMUM</td> 055 * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr> 056 * <tr> 057 * <td>MINIMAL</td> 058 * <td>default (ties averaged)</td> 059 * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr> 060 * <tr> 061 * <td>REMOVED</td> 062 * <td>SEQUENTIAL</td> 063 * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr> 064 * <tr> 065 * <td>MINIMAL</td> 066 * <td>MAXIMUM</td> 067 * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p> 068 * 069 * @since 2.0 070 * @version $Id: NaturalRanking.java 1416643 2012-12-03 19:37:14Z tn $ 071 */ 072 public class NaturalRanking implements RankingAlgorithm { 073 074 /** default NaN strategy */ 075 public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED; 076 077 /** default ties strategy */ 078 public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE; 079 080 /** NaN strategy - defaults to NaNs maximal */ 081 private final NaNStrategy nanStrategy; 082 083 /** Ties strategy - defaults to ties averaged */ 084 private final TiesStrategy tiesStrategy; 085 086 /** Source of random data - used only when ties strategy is RANDOM */ 087 private final RandomData randomData; 088 089 /** 090 * Create a NaturalRanking with default strategies for handling ties and NaNs. 091 */ 092 public NaturalRanking() { 093 super(); 094 tiesStrategy = DEFAULT_TIES_STRATEGY; 095 nanStrategy = DEFAULT_NAN_STRATEGY; 096 randomData = null; 097 } 098 099 /** 100 * Create a NaturalRanking with the given TiesStrategy. 101 * 102 * @param tiesStrategy the TiesStrategy to use 103 */ 104 public NaturalRanking(TiesStrategy tiesStrategy) { 105 super(); 106 this.tiesStrategy = tiesStrategy; 107 nanStrategy = DEFAULT_NAN_STRATEGY; 108 randomData = new RandomDataImpl(); 109 } 110 111 /** 112 * Create a NaturalRanking with the given NaNStrategy. 113 * 114 * @param nanStrategy the NaNStrategy to use 115 */ 116 public NaturalRanking(NaNStrategy nanStrategy) { 117 super(); 118 this.nanStrategy = nanStrategy; 119 tiesStrategy = DEFAULT_TIES_STRATEGY; 120 randomData = null; 121 } 122 123 /** 124 * Create a NaturalRanking with the given NaNStrategy and TiesStrategy. 125 * 126 * @param nanStrategy NaNStrategy to use 127 * @param tiesStrategy TiesStrategy to use 128 */ 129 public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) { 130 super(); 131 this.nanStrategy = nanStrategy; 132 this.tiesStrategy = tiesStrategy; 133 randomData = new RandomDataImpl(); 134 } 135 136 /** 137 * Create a NaturalRanking with TiesStrategy.RANDOM and the given 138 * RandomGenerator as the source of random data. 139 * 140 * @param randomGenerator source of random data 141 */ 142 public NaturalRanking(RandomGenerator randomGenerator) { 143 super(); 144 this.tiesStrategy = TiesStrategy.RANDOM; 145 nanStrategy = DEFAULT_NAN_STRATEGY; 146 randomData = new RandomDataImpl(randomGenerator); 147 } 148 149 150 /** 151 * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM 152 * and the given source of random data. 153 * 154 * @param nanStrategy NaNStrategy to use 155 * @param randomGenerator source of random data 156 */ 157 public NaturalRanking(NaNStrategy nanStrategy, 158 RandomGenerator randomGenerator) { 159 super(); 160 this.nanStrategy = nanStrategy; 161 this.tiesStrategy = TiesStrategy.RANDOM; 162 randomData = new RandomDataImpl(randomGenerator); 163 } 164 165 /** 166 * Return the NaNStrategy 167 * 168 * @return returns the NaNStrategy 169 */ 170 public NaNStrategy getNanStrategy() { 171 return nanStrategy; 172 } 173 174 /** 175 * Return the TiesStrategy 176 * 177 * @return the TiesStrategy 178 */ 179 public TiesStrategy getTiesStrategy() { 180 return tiesStrategy; 181 } 182 183 /** 184 * Rank <code>data</code> using the natural ordering on Doubles, with 185 * NaN values handled according to <code>nanStrategy</code> and ties 186 * resolved using <code>tiesStrategy.</code> 187 * 188 * @param data array to be ranked 189 * @return array of ranks 190 * @throws NotANumberException if the selected {@link NaNStrategy} is {@code FAILED} 191 * and a {@link Double#NaN} is encountered in the input data 192 */ 193 public double[] rank(double[] data) { 194 195 // Array recording initial positions of data to be ranked 196 IntDoublePair[] ranks = new IntDoublePair[data.length]; 197 for (int i = 0; i < data.length; i++) { 198 ranks[i] = new IntDoublePair(data[i], i); 199 } 200 201 // Recode, remove or record positions of NaNs 202 List<Integer> nanPositions = null; 203 switch (nanStrategy) { 204 case MAXIMAL: // Replace NaNs with +INFs 205 recodeNaNs(ranks, Double.POSITIVE_INFINITY); 206 break; 207 case MINIMAL: // Replace NaNs with -INFs 208 recodeNaNs(ranks, Double.NEGATIVE_INFINITY); 209 break; 210 case REMOVED: // Drop NaNs from data 211 ranks = removeNaNs(ranks); 212 break; 213 case FIXED: // Record positions of NaNs 214 nanPositions = getNanPositions(ranks); 215 break; 216 case FAILED: 217 nanPositions = getNanPositions(ranks); 218 if (nanPositions.size() > 0) { 219 throw new NotANumberException(); 220 } 221 break; 222 default: // this should not happen unless NaNStrategy enum is changed 223 throw new MathInternalError(); 224 } 225 226 // Sort the IntDoublePairs 227 Arrays.sort(ranks); 228 229 // Walk the sorted array, filling output array using sorted positions, 230 // resolving ties as we go 231 double[] out = new double[ranks.length]; 232 int pos = 1; // position in sorted array 233 out[ranks[0].getPosition()] = pos; 234 List<Integer> tiesTrace = new ArrayList<Integer>(); 235 tiesTrace.add(ranks[0].getPosition()); 236 for (int i = 1; i < ranks.length; i++) { 237 if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) { 238 // tie sequence has ended (or had length 1) 239 pos = i + 1; 240 if (tiesTrace.size() > 1) { // if seq is nontrivial, resolve 241 resolveTie(out, tiesTrace); 242 } 243 tiesTrace = new ArrayList<Integer>(); 244 tiesTrace.add(ranks[i].getPosition()); 245 } else { 246 // tie sequence continues 247 tiesTrace.add(ranks[i].getPosition()); 248 } 249 out[ranks[i].getPosition()] = pos; 250 } 251 if (tiesTrace.size() > 1) { // handle tie sequence at end 252 resolveTie(out, tiesTrace); 253 } 254 if (nanStrategy == NaNStrategy.FIXED) { 255 restoreNaNs(out, nanPositions); 256 } 257 return out; 258 } 259 260 /** 261 * Returns an array that is a copy of the input array with IntDoublePairs 262 * having NaN values removed. 263 * 264 * @param ranks input array 265 * @return array with NaN-valued entries removed 266 */ 267 private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) { 268 if (!containsNaNs(ranks)) { 269 return ranks; 270 } 271 IntDoublePair[] outRanks = new IntDoublePair[ranks.length]; 272 int j = 0; 273 for (int i = 0; i < ranks.length; i++) { 274 if (Double.isNaN(ranks[i].getValue())) { 275 // drop, but adjust original ranks of later elements 276 for (int k = i + 1; k < ranks.length; k++) { 277 ranks[k] = new IntDoublePair( 278 ranks[k].getValue(), ranks[k].getPosition() - 1); 279 } 280 } else { 281 outRanks[j] = new IntDoublePair( 282 ranks[i].getValue(), ranks[i].getPosition()); 283 j++; 284 } 285 } 286 IntDoublePair[] returnRanks = new IntDoublePair[j]; 287 System.arraycopy(outRanks, 0, returnRanks, 0, j); 288 return returnRanks; 289 } 290 291 /** 292 * Recodes NaN values to the given value. 293 * 294 * @param ranks array to recode 295 * @param value the value to replace NaNs with 296 */ 297 private void recodeNaNs(IntDoublePair[] ranks, double value) { 298 for (int i = 0; i < ranks.length; i++) { 299 if (Double.isNaN(ranks[i].getValue())) { 300 ranks[i] = new IntDoublePair( 301 value, ranks[i].getPosition()); 302 } 303 } 304 } 305 306 /** 307 * Checks for presence of NaNs in <code>ranks.</code> 308 * 309 * @param ranks array to be searched for NaNs 310 * @return true iff ranks contains one or more NaNs 311 */ 312 private boolean containsNaNs(IntDoublePair[] ranks) { 313 for (int i = 0; i < ranks.length; i++) { 314 if (Double.isNaN(ranks[i].getValue())) { 315 return true; 316 } 317 } 318 return false; 319 } 320 321 /** 322 * Resolve a sequence of ties, using the configured {@link TiesStrategy}. 323 * The input <code>ranks</code> array is expected to take the same value 324 * for all indices in <code>tiesTrace</code>. The common value is recoded 325 * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>, 326 * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged. 327 * The same array and trace with tiesStrategy AVERAGE will come out 328 * <5,8,3,6,3,7,1,3>. 329 * 330 * @param ranks array of ranks 331 * @param tiesTrace list of indices where <code>ranks</code> is constant 332 * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j] 333 * </code> 334 */ 335 private void resolveTie(double[] ranks, List<Integer> tiesTrace) { 336 337 // constant value of ranks over tiesTrace 338 final double c = ranks[tiesTrace.get(0)]; 339 340 // length of sequence of tied ranks 341 final int length = tiesTrace.size(); 342 343 switch (tiesStrategy) { 344 case AVERAGE: // Replace ranks with average 345 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d); 346 break; 347 case MAXIMUM: // Replace ranks with maximum values 348 fill(ranks, tiesTrace, c + length - 1); 349 break; 350 case MINIMUM: // Replace ties with minimum 351 fill(ranks, tiesTrace, c); 352 break; 353 case RANDOM: // Fill with random integral values in [c, c + length - 1] 354 Iterator<Integer> iterator = tiesTrace.iterator(); 355 long f = FastMath.round(c); 356 while (iterator.hasNext()) { 357 // No advertised exception because args are guaranteed valid 358 ranks[iterator.next()] = 359 randomData.nextLong(f, f + length - 1); 360 } 361 break; 362 case SEQUENTIAL: // Fill sequentially from c to c + length - 1 363 // walk and fill 364 iterator = tiesTrace.iterator(); 365 f = FastMath.round(c); 366 int i = 0; 367 while (iterator.hasNext()) { 368 ranks[iterator.next()] = f + i++; 369 } 370 break; 371 default: // this should not happen unless TiesStrategy enum is changed 372 throw new MathInternalError(); 373 } 374 } 375 376 /** 377 * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code> 378 * 379 * @param data array to modify 380 * @param tiesTrace list of index values to set 381 * @param value value to set 382 */ 383 private void fill(double[] data, List<Integer> tiesTrace, double value) { 384 Iterator<Integer> iterator = tiesTrace.iterator(); 385 while (iterator.hasNext()) { 386 data[iterator.next()] = value; 387 } 388 } 389 390 /** 391 * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code> 392 * 393 * @param ranks array to modify 394 * @param nanPositions list of index values to set to <code>Double.NaN</code> 395 */ 396 private void restoreNaNs(double[] ranks, List<Integer> nanPositions) { 397 if (nanPositions.size() == 0) { 398 return; 399 } 400 Iterator<Integer> iterator = nanPositions.iterator(); 401 while (iterator.hasNext()) { 402 ranks[iterator.next().intValue()] = Double.NaN; 403 } 404 405 } 406 407 /** 408 * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code> 409 * 410 * @param ranks array to search for <code>NaNs</code> 411 * @return list of indexes i such that <code>ranks[i] = NaN</code> 412 */ 413 private List<Integer> getNanPositions(IntDoublePair[] ranks) { 414 ArrayList<Integer> out = new ArrayList<Integer>(); 415 for (int i = 0; i < ranks.length; i++) { 416 if (Double.isNaN(ranks[i].getValue())) { 417 out.add(Integer.valueOf(i)); 418 } 419 } 420 return out; 421 } 422 423 /** 424 * Represents the position of a double value in an ordering. 425 * Comparable interface is implemented so Arrays.sort can be used 426 * to sort an array of IntDoublePairs by value. Note that the 427 * implicitly defined natural ordering is NOT consistent with equals. 428 */ 429 private static class IntDoublePair implements Comparable<IntDoublePair> { 430 431 /** Value of the pair */ 432 private final double value; 433 434 /** Original position of the pair */ 435 private final int position; 436 437 /** 438 * Construct an IntDoublePair with the given value and position. 439 * @param value the value of the pair 440 * @param position the original position 441 */ 442 public IntDoublePair(double value, int position) { 443 this.value = value; 444 this.position = position; 445 } 446 447 /** 448 * Compare this IntDoublePair to another pair. 449 * Only the <strong>values</strong> are compared. 450 * 451 * @param other the other pair to compare this to 452 * @return result of <code>Double.compare(value, other.value)</code> 453 */ 454 public int compareTo(IntDoublePair other) { 455 return Double.compare(value, other.value); 456 } 457 458 // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion. 459 460 /** 461 * Returns the value of the pair. 462 * @return value 463 */ 464 public double getValue() { 465 return value; 466 } 467 468 /** 469 * Returns the original position of the pair. 470 * @return position 471 */ 472 public int getPosition() { 473 return position; 474 } 475 } 476 }