View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.DataInput;
22  import java.io.DataOutput;
23  import java.io.IOException;
24  import java.util.Arrays;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.classification.InterfaceAudience;
29  import org.apache.hadoop.classification.InterfaceStability;
30  import org.apache.hadoop.hbase.TableName;
31  import org.apache.hadoop.hbase.HConstants;
32  import org.apache.hadoop.hbase.client.Scan;
33  import org.apache.hadoop.hbase.util.Bytes;
34  import org.apache.hadoop.io.Writable;
35  import org.apache.hadoop.io.WritableUtils;
36  import org.apache.hadoop.mapreduce.InputSplit;
37  
38  /**
39   * A table split corresponds to a key range (low, high) and an optional scanner.
40   * All references to row below refer to the key of the row.
41   */
42  @InterfaceAudience.Public
43  @InterfaceStability.Evolving
44  public class TableSplit extends InputSplit
45  implements Writable, Comparable<TableSplit> {
46    public static final Log LOG = LogFactory.getLog(TableSplit.class);
47    
48    // should be < 0 (@see #readFields(DataInput))
49    // version 1 supports Scan data member
50    enum Version {
51      UNVERSIONED(0),
52      // Initial number we put on TableSplit when we introduced versioning.
53      INITIAL(-1);
54  
55      final int code;
56      static final Version[] byCode;
57      static {
58        byCode = Version.values();
59        for (int i = 0; i < byCode.length; i++) {
60          if (byCode[i].code != -1 * i) {
61            throw new AssertionError("Values in this enum should be descending by one");
62          }
63        }
64      }
65  
66      Version(int code) {
67        this.code = code;
68      }
69  
70      boolean atLeast(Version other) {
71        return code <= other.code;
72      }
73  
74      static Version fromCode(int code) {
75        return byCode[code * -1];
76      }
77    }
78    
79    private static final Version VERSION = Version.INITIAL;
80    private TableName tableName;
81    private byte [] startRow;
82    private byte [] endRow;
83    private String regionLocation;
84    private String scan = ""; // stores the serialized form of the Scan
85  
86    /** Default constructor. */
87    public TableSplit() {
88      this(null, null, HConstants.EMPTY_BYTE_ARRAY,
89        HConstants.EMPTY_BYTE_ARRAY, "");
90    }
91  
92    /**
93     * Creates a new instance while assigning all variables.
94     *
95     * @param tableName  The name of the current table.
96     * @param scan The scan associated with this split.
97     * @param startRow  The start row of the split.
98     * @param endRow  The end row of the split.
99     * @param location  The location of the region.
100    */
101   public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
102       final String location) {
103     this.tableName = tableName;
104     try {
105       this.scan =
106         (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan);
107     } catch (IOException e) {
108       LOG.warn("Failed to convert Scan to String", e);
109     }
110     this.startRow = startRow;
111     this.endRow = endRow;
112     this.regionLocation = location;
113   }
114   
115   /**
116    * Creates a new instance without a scanner.
117    *
118    * @param tableName The name of the current table.
119    * @param startRow The start row of the split.
120    * @param endRow The end row of the split.
121    * @param location The location of the region.
122    */
123   public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
124       final String location) {
125     this(tableName, null, startRow, endRow, location);
126   }
127 
128   /**
129    * Returns a Scan object from the stored string representation.
130    *
131    * @return Returns a Scan object based on the stored scanner.
132    * @throws IOException
133    */
134   public Scan getScan() throws IOException {
135     return TableMapReduceUtil.convertStringToScan(this.scan);
136   }
137 
138   /**
139    * Returns the table name.
140    *
141    * @return The table name.
142    */
143   public TableName getTableName() {
144     return tableName;
145   }
146 
147   /**
148    * Returns the start row.
149    *
150    * @return The start row.
151    */
152   public byte [] getStartRow() {
153     return startRow;
154   }
155 
156   /**
157    * Returns the end row.
158    *
159    * @return The end row.
160    */
161   public byte [] getEndRow() {
162     return endRow;
163   }
164 
165   /**
166    * Returns the region location.
167    *
168    * @return The region's location.
169    */
170   public String getRegionLocation() {
171     return regionLocation;
172   }
173 
174   /**
175    * Returns the region's location as an array.
176    *
177    * @return The array containing the region location.
178    * @see org.apache.hadoop.mapreduce.InputSplit#getLocations()
179    */
180   @Override
181   public String[] getLocations() {
182     return new String[] {regionLocation};
183   }
184 
185   /**
186    * Returns the length of the split.
187    *
188    * @return The length of the split.
189    * @see org.apache.hadoop.mapreduce.InputSplit#getLength()
190    */
191   @Override
192   public long getLength() {
193     // Not clear how to obtain this... seems to be used only for sorting splits
194     return 0;
195   }
196 
197   /**
198    * Reads the values of each field.
199    *
200    * @param in  The input to read from.
201    * @throws IOException When reading the input fails.
202    */
203   @Override
204   public void readFields(DataInput in) throws IOException {
205     Version version = Version.UNVERSIONED;
206     // TableSplit was not versioned in the beginning.
207     // In order to introduce it now, we make use of the fact
208     // that tableName was written with Bytes.writeByteArray,
209     // which encodes the array length as a vint which is >= 0.
210     // Hence if the vint is >= 0 we have an old version and the vint
211     // encodes the length of tableName.
212     // If < 0 we just read the version and the next vint is the length.
213     // @see Bytes#readByteArray(DataInput)
214     int len = WritableUtils.readVInt(in);
215     if (len < 0) {
216       // what we just read was the version
217       version = Version.fromCode(len);
218       len = WritableUtils.readVInt(in);
219     }
220     byte[] tableNameBytes = new byte[len];
221     in.readFully(tableNameBytes);
222     tableName = TableName.valueOf(tableNameBytes);
223     startRow = Bytes.readByteArray(in);
224     endRow = Bytes.readByteArray(in);
225     regionLocation = Bytes.toString(Bytes.readByteArray(in));
226     if (version.atLeast(Version.INITIAL)) {
227       scan = Bytes.toString(Bytes.readByteArray(in));
228     }
229   }
230 
231   /**
232    * Writes the field values to the output.
233    *
234    * @param out  The output to write to.
235    * @throws IOException When writing the values to the output fails.
236    */
237   @Override
238   public void write(DataOutput out) throws IOException {
239     WritableUtils.writeVInt(out, VERSION.code);
240     Bytes.writeByteArray(out, tableName.getName());
241     Bytes.writeByteArray(out, startRow);
242     Bytes.writeByteArray(out, endRow);
243     Bytes.writeByteArray(out, Bytes.toBytes(regionLocation));
244     Bytes.writeByteArray(out, Bytes.toBytes(scan));
245   }
246 
247   /**
248    * Returns the details about this instance as a string.
249    *
250    * @return The values of this instance as a string.
251    * @see java.lang.Object#toString()
252    */
253   @Override
254   public String toString() {
255     return regionLocation + ":" +
256       Bytes.toStringBinary(startRow) + "," + Bytes.toStringBinary(endRow);
257   }
258 
259   /**
260    * Compares this split against the given one.
261    *
262    * @param split  The split to compare to.
263    * @return The result of the comparison.
264    * @see java.lang.Comparable#compareTo(java.lang.Object)
265    */
266   @Override
267   public int compareTo(TableSplit split) {
268     // If The table name of the two splits is the same then compare start row
269     // otherwise compare based on table names
270     int tableNameComparison =
271         getTableName().compareTo(split.getTableName());
272     return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo(
273         getStartRow(), split.getStartRow());
274   }
275 
276   @Override
277   public boolean equals(Object o) {
278     if (o == null || !(o instanceof TableSplit)) {
279       return false;
280     }
281     return tableName.equals(((TableSplit)o).tableName) &&
282       Bytes.equals(startRow, ((TableSplit)o).startRow) &&
283       Bytes.equals(endRow, ((TableSplit)o).endRow) &&
284       regionLocation.equals(((TableSplit)o).regionLocation);
285   }
286 
287     @Override
288     public int hashCode() {
289         int result = tableName != null ? tableName.hashCode() : 0;
290         result = 31 * result + (scan != null ? scan.hashCode() : 0);
291         result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0);
292         result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
293         result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0);
294         return result;
295     }
296 }