View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase;
20  
21  import java.io.Serializable;
22  import java.util.Comparator;
23  
24  import org.apache.hadoop.hbase.classification.InterfaceAudience;
25  import org.apache.hadoop.hbase.classification.InterfaceStability;
26  import org.apache.hadoop.hbase.KeyValue.Type;
27  import org.apache.hadoop.hbase.util.Bytes;
28  
29  import com.google.common.primitives.Longs;
30  
31  /**
32   * Compare two HBase cells.  Do not use this method comparing <code>-ROOT-</code> or
33   * <code>hbase:meta</code> cells.  Cells from these tables need a specialized comparator, one that
34   * takes account of the special formatting of the row where we have commas to delimit table from
35   * regionname, from row.  See KeyValue for how it has a special comparator to do hbase:meta cells
36   * and yet another for -ROOT-.
37   */
38  @edu.umd.cs.findbugs.annotations.SuppressWarnings(
39      value="UNKNOWN",
40      justification="Findbugs doesn't like the way we are negating the result of a compare in below")
41  @InterfaceAudience.Private
42  @InterfaceStability.Evolving
43  public class CellComparator implements Comparator<Cell>, Serializable{
44    private static final long serialVersionUID = -8760041766259623329L;
45  
46    @Override
47    public int compare(Cell a, Cell b) {
48      return compareStatic(a, b);
49    }
50  
51    public static int compareStatic(Cell a, Cell b) {
52      return compareStatic(a, b, false);
53    }
54    
55    public static int compareStatic(Cell a, Cell b, boolean onlyKey) {
56      //row
57      int c = Bytes.compareTo(
58          a.getRowArray(), a.getRowOffset(), a.getRowLength(),
59          b.getRowArray(), b.getRowOffset(), b.getRowLength());
60      if (c != 0) return c;
61  
62      // If the column is not specified, the "minimum" key type appears the
63      // latest in the sorted order, regardless of the timestamp. This is used
64      // for specifying the last key/value in a given row, because there is no
65      // "lexicographically last column" (it would be infinitely long). The
66      // "maximum" key type does not need this behavior.
67      if (a.getFamilyLength() == 0 && a.getTypeByte() == Type.Minimum.getCode()) {
68        // a is "bigger", i.e. it appears later in the sorted order
69        return 1;
70      }
71      if (b.getFamilyLength() == 0 && b.getTypeByte() == Type.Minimum.getCode()) {
72        return -1;
73      }
74  
75      //family
76      c = Bytes.compareTo(
77        a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(),
78        b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength());
79      if (c != 0) return c;
80  
81      //qualifier
82      c = Bytes.compareTo(
83          a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(),
84          b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength());
85      if (c != 0) return c;
86  
87      //timestamp: later sorts first
88      c = Longs.compare(b.getTimestamp(), a.getTimestamp());
89      if (c != 0) return c;
90  
91      //type
92      c = (0xff & b.getTypeByte()) - (0xff & a.getTypeByte());
93      if (c != 0) return c;
94  
95      if (onlyKey) return c;
96  
97      //mvccVersion: later sorts first
98      return Longs.compare(b.getMvccVersion(), a.getMvccVersion());
99    }
100 
101 
102   /**************** equals ****************************/
103 
104   public static boolean equals(Cell a, Cell b){
105     return equalsRow(a, b)
106         && equalsFamily(a, b)
107         && equalsQualifier(a, b)
108         && equalsTimestamp(a, b)
109         && equalsType(a, b);
110   }
111 
112   public static boolean equalsRow(Cell a, Cell b){
113     return Bytes.equals(
114       a.getRowArray(), a.getRowOffset(), a.getRowLength(),
115       b.getRowArray(), b.getRowOffset(), b.getRowLength());
116   }
117 
118   public static boolean equalsFamily(Cell a, Cell b){
119     return Bytes.equals(
120       a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(),
121       b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength());
122   }
123 
124   public static boolean equalsQualifier(Cell a, Cell b){
125     return Bytes.equals(
126       a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(),
127       b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength());
128   }
129 
130   public static boolean equalsTimestamp(Cell a, Cell b){
131     return a.getTimestamp() == b.getTimestamp();
132   }
133 
134   public static boolean equalsType(Cell a, Cell b){
135     return a.getTypeByte() == b.getTypeByte();
136   }
137 
138   public static int compareFamilies(Cell left, Cell right) {
139     return Bytes.compareTo(left.getFamilyArray(), left.getFamilyOffset(), left.getFamilyLength(),
140         right.getFamilyArray(), right.getFamilyOffset(), right.getFamilyLength());
141   }
142 
143   public static int compareQualifiers(Cell left, Cell right) {
144     return Bytes.compareTo(left.getQualifierArray(), left.getQualifierOffset(),
145         left.getQualifierLength(), right.getQualifierArray(), right.getQualifierOffset(),
146         right.getQualifierLength());
147   }
148 
149   /**
150    * Do not use comparing rows from hbase:meta. Meta table Cells have schema (table,startrow,hash)
151    * so can't be treated as plain byte arrays as this method does.
152    */
153   public static int compareRows(final Cell left, final Cell right) {
154     return Bytes.compareTo(left.getRowArray(), left.getRowOffset(), left.getRowLength(),
155         right.getRowArray(), right.getRowOffset(), right.getRowLength());
156   }
157 
158   /**
159    * Do not use comparing rows from hbase:meta. Meta table Cells have schema (table,startrow,hash)
160    * so can't be treated as plain byte arrays as this method does.
161    */
162   public static int compareRows(byte[] left, int loffset, int llength, byte[] right, int roffset,
163       int rlength) {
164     return Bytes.compareTo(left, loffset, llength, right, roffset, rlength);
165   }
166 
167   /**
168    * Compares cell's timestamps in DESCENDING order.
169    * The below older timestamps sorting ahead of newer timestamps looks
170    * wrong but it is intentional. This way, newer timestamps are first
171    * found when we iterate over a memstore and newer versions are the
172    * first we trip over when reading from a store file.
173    * @return 1 if left's timestamp < right's timestamp
174    *         -1 if left's timestamp > right's timestamp
175    *         0 if both timestamps are equal
176    */
177   public static int compareTimestamps(final Cell left, final Cell right) {
178     long ltimestamp = left.getTimestamp();
179     long rtimestamp = right.getTimestamp();
180     return compareTimestamps(ltimestamp, rtimestamp);
181   }
182 
183   /********************* hashCode ************************/
184 
185   /**
186    * Returns a hash code that is always the same for two Cells having a matching equals(..) result.
187    */
188   public static int hashCode(Cell cell){
189     if (cell == null) {// return 0 for empty Cell
190       return 0;
191     }
192     int hash = calculateHashForKeyValue(cell);
193     hash = 31 * hash + (int)cell.getMvccVersion();
194     return hash;
195   }
196 
197   /**
198    * Returns a hash code that is always the same for two Cells having a matching
199    * equals(..) result. Note : Ignore mvcc while calculating the hashcode
200    * 
201    * @param cell
202    * @return hashCode
203    */
204   public static int hashCodeIgnoreMvcc(Cell cell) {
205     if (cell == null) {// return 0 for empty Cell
206       return 0;
207     }
208     int hash = calculateHashForKeyValue(cell);
209     return hash;
210   }
211 
212   private static int calculateHashForKeyValue(Cell cell) {
213     //pre-calculate the 3 hashes made of byte ranges
214     int rowHash = Bytes.hashCode(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
215     int familyHash =
216       Bytes.hashCode(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
217     int qualifierHash = Bytes.hashCode(cell.getQualifierArray(), cell.getQualifierOffset(),
218       cell.getQualifierLength());
219 
220     //combine the 6 sub-hashes
221     int hash = 31 * rowHash + familyHash;
222     hash = 31 * hash + qualifierHash;
223     hash = 31 * hash + (int)cell.getTimestamp();
224     hash = 31 * hash + cell.getTypeByte();
225     return hash;
226   }
227 
228 
229   /******************** lengths *************************/
230 
231   public static boolean areKeyLengthsEqual(Cell a, Cell b) {
232     return a.getRowLength() == b.getRowLength()
233         && a.getFamilyLength() == b.getFamilyLength()
234         && a.getQualifierLength() == b.getQualifierLength();
235   }
236 
237   public static boolean areRowLengthsEqual(Cell a, Cell b) {
238     return a.getRowLength() == b.getRowLength();
239   }
240 
241 
242   /***************** special cases ****************************/
243 
244   /**
245    * special case for KeyValue.equals
246    */
247   private static int compareStaticIgnoreMvccVersion(Cell a, Cell b) {
248     //row
249     int c = Bytes.compareTo(
250         a.getRowArray(), a.getRowOffset(), a.getRowLength(),
251         b.getRowArray(), b.getRowOffset(), b.getRowLength());
252     if (c != 0) return c;
253 
254     //family
255     c = Bytes.compareTo(
256       a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(),
257       b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength());
258     if (c != 0) return c;
259 
260     //qualifier
261     c = Bytes.compareTo(
262         a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(),
263         b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength());
264     if (c != 0) return c;
265 
266     //timestamp: later sorts first
267     c = Longs.compare(b.getTimestamp(), a.getTimestamp());
268     if (c != 0) return c;
269 
270     //type
271     c = (0xff & b.getTypeByte()) - (0xff & a.getTypeByte());
272     return c;
273   }
274 
275   /**
276    * special case for KeyValue.equals
277    */
278   public static boolean equalsIgnoreMvccVersion(Cell a, Cell b){
279     return 0 == compareStaticIgnoreMvccVersion(a, b);
280   }
281 
282   /**
283    * Compares timestamps in DESCENDING order.
284    * The below older timestamps sorting ahead of newer timestamps looks
285    * wrong but it is intentional. This way, newer timestamps are first
286    * found when we iterate over a memstore and newer versions are the
287    * first we trip over when reading from a store file.
288    * @return 1 if left timestamp < right timestamp
289    *         -1 if left timestamp > right timestamp
290    *         0 if both timestamps are equal
291    */
292   static int compareTimestamps(final long ltimestamp, final long rtimestamp) {
293     if (ltimestamp < rtimestamp) {
294       return 1;
295     } else if (ltimestamp > rtimestamp) {
296       return -1;
297     }
298     return 0;
299   }
300 
301 }