View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.client;
20  
21  import com.google.protobuf.Service;
22  import com.google.protobuf.ServiceException;
23  import org.apache.hadoop.classification.InterfaceAudience;
24  import org.apache.hadoop.classification.InterfaceStability;
25  import org.apache.hadoop.conf.Configuration;
26  import org.apache.hadoop.hbase.TableName;
27  import org.apache.hadoop.hbase.HTableDescriptor;
28  import org.apache.hadoop.hbase.KeyValue;
29  import org.apache.hadoop.hbase.client.coprocessor.Batch;
30  import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel;
31  
32  import java.io.Closeable;
33  import java.io.IOException;
34  import java.util.List;
35  import java.util.Map;
36  
37  /**
38   * Used to communicate with a single HBase table.
39   *
40   * @since 0.21.0
41   */
42  @InterfaceAudience.Public
43  @InterfaceStability.Stable
44  public interface HTableInterface extends Closeable {
45  
46    /**
47     * Gets the name of this table.
48     *
49     * @return the table name.
50     */
51    byte[] getTableName();
52  
53    /**
54     * Gets the fully qualified table name instance of this table.
55     */
56    TableName getName();
57  
58    /**
59     * Returns the {@link Configuration} object used by this instance.
60     * <p>
61     * The reference returned is not a copy, so any change made to it will
62     * affect this instance.
63     */
64    Configuration getConfiguration();
65  
66    /**
67     * Gets the {@link HTableDescriptor table descriptor} for this table.
68     * @throws IOException if a remote or network exception occurs.
69     */
70    HTableDescriptor getTableDescriptor() throws IOException;
71  
72    /**
73     * Test for the existence of columns in the table, as specified by the Get.
74     * <p>
75     *
76     * This will return true if the Get matches one or more keys, false if not.
77     * <p>
78     *
79     * This is a server-side call so it prevents any data from being transfered to
80     * the client.
81     *
82     * @param get the Get
83     * @return true if the specified Get matches one or more keys, false if not
84     * @throws IOException e
85     */
86    boolean exists(Get get) throws IOException;
87  
88    /**
89     * Test for the existence of columns in the table, as specified by the Gets.
90     * <p>
91     *
92     * This will return an array of booleans. Each value will be true if the related Get matches
93     * one or more keys, false if not.
94     * <p>
95     *
96     * This is a server-side call so it prevents any data from being transfered to
97     * the client.
98     *
99     * @param gets the Gets
100    * @return Array of Boolean true if the specified Get matches one or more keys, false if not
101    * @throws IOException e
102    */
103   Boolean[] exists(List<Get> gets) throws IOException;
104 
105   /**
106    * Method that does a batch call on Deletes, Gets, Puts, Increments, Appends and RowMutations.
107    * The ordering of execution of the actions is not defined. Meaning if you do a Put and a
108    * Get in the same {@link #batch} call, you will not necessarily be
109    * guaranteed that the Get returns what the Put had put.
110    *
111    * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects
112    * @param results Empty Object[], same size as actions. Provides access to partial
113    *                results, in case an exception is thrown. A null in the result array means that
114    *                the call for that action failed, even after retries
115    * @throws IOException
116    * @since 0.90.0
117    */
118   void batch(final List<?extends Row> actions, final Object[] results) throws IOException, InterruptedException;
119 
120   /**
121    * Same as {@link #batch(List, Object[])}, but returns an array of
122    * results instead of using a results parameter reference.
123    *
124    * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects
125    * @return the results from the actions. A null in the return array means that
126    *         the call for that action failed, even after retries
127    * @throws IOException
128    * @since 0.90.0
129    */
130   Object[] batch(final List<? extends Row> actions) throws IOException, InterruptedException;
131 
132   /**
133    * Same as {@link #batch(List, Object[])}, but with a callback.
134    * @since 0.96.0
135    */
136   <R> void batchCallback(
137     final List<? extends Row> actions, final Object[] results, final Batch.Callback<R> callback
138   )
139     throws IOException, InterruptedException;
140 
141 
142   /**
143    * Same as {@link #batch(List)}, but with a callback.
144    * @since 0.96.0
145    */
146   <R> Object[] batchCallback(
147     List<? extends Row> actions, Batch.Callback<R> callback
148   ) throws IOException,
149     InterruptedException;
150 
151   /**
152    * Extracts certain cells from a given row.
153    * @param get The object that specifies what data to fetch and from which row.
154    * @return The data coming from the specified row, if it exists.  If the row
155    * specified doesn't exist, the {@link Result} instance returned won't
156    * contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}.
157    * @throws IOException if a remote or network exception occurs.
158    * @since 0.20.0
159    */
160   Result get(Get get) throws IOException;
161 
162   /**
163    * Extracts certain cells from the given rows, in batch.
164    *
165    * @param gets The objects that specify what data to fetch and from which rows.
166    *
167    * @return The data coming from the specified rows, if it exists.  If the row
168    *         specified doesn't exist, the {@link Result} instance returned won't
169    *         contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}.
170    *         If there are any failures even after retries, there will be a null in
171    *         the results array for those Gets, AND an exception will be thrown.
172    * @throws IOException if a remote or network exception occurs.
173    *
174    * @since 0.90.0
175    */
176   Result[] get(List<Get> gets) throws IOException;
177 
178   /**
179    * Return the row that matches <i>row</i> exactly,
180    * or the one that immediately precedes it.
181    *
182    * @param row A row key.
183    * @param family Column family to include in the {@link Result}.
184    * @throws IOException if a remote or network exception occurs.
185    * @since 0.20.0
186    * 
187    * @deprecated As of version 0.92 this method is deprecated without
188    * replacement.   
189    * getRowOrBefore is used internally to find entries in .META. and makes
190    * various assumptions about the table (which are true for .META. but not
191    * in general) to be efficient.
192    */
193   Result getRowOrBefore(byte[] row, byte[] family) throws IOException;
194 
195   /**
196    * Returns a scanner on the current table as specified by the {@link Scan}
197    * object.
198    * Note that the passed {@link Scan}'s start row and caching properties
199    * maybe changed.
200    *
201    * @param scan A configured {@link Scan} object.
202    * @return A scanner.
203    * @throws IOException if a remote or network exception occurs.
204    * @since 0.20.0
205    */
206   ResultScanner getScanner(Scan scan) throws IOException;
207 
208   /**
209    * Gets a scanner on the current table for the given family.
210    *
211    * @param family The column family to scan.
212    * @return A scanner.
213    * @throws IOException if a remote or network exception occurs.
214    * @since 0.20.0
215    */
216   ResultScanner getScanner(byte[] family) throws IOException;
217 
218   /**
219    * Gets a scanner on the current table for the given family and qualifier.
220    *
221    * @param family The column family to scan.
222    * @param qualifier The column qualifier to scan.
223    * @return A scanner.
224    * @throws IOException if a remote or network exception occurs.
225    * @since 0.20.0
226    */
227   ResultScanner getScanner(byte[] family, byte[] qualifier) throws IOException;
228 
229 
230   /**
231    * Puts some data in the table.
232    * <p>
233    * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered
234    * until the internal buffer is full.
235    * @param put The data to put.
236    * @throws IOException if a remote or network exception occurs.
237    * @since 0.20.0
238    */
239   void put(Put put) throws IOException;
240 
241   /**
242    * Puts some data in the table, in batch.
243    * <p>
244    * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered
245    * until the internal buffer is full.
246    * <p>
247    * This can be used for group commit, or for submitting user defined
248    * batches.  The writeBuffer will be periodically inspected while the List
249    * is processed, so depending on the List size the writeBuffer may flush
250    * not at all, or more than once.
251    * @param puts The list of mutations to apply. The batch put is done by
252    * aggregating the iteration of the Puts over the write buffer
253    * at the client-side for a single RPC call.
254    * @throws IOException if a remote or network exception occurs.
255    * @since 0.20.0
256    */
257   void put(List<Put> puts) throws IOException;
258 
259   /**
260    * Atomically checks if a row/family/qualifier value matches the expected
261    * value. If it does, it adds the put.  If the passed value is null, the check
262    * is for the lack of column (ie: non-existance)
263    *
264    * @param row to check
265    * @param family column family to check
266    * @param qualifier column qualifier to check
267    * @param value the expected value
268    * @param put data to put if check succeeds
269    * @throws IOException e
270    * @return true if the new put was executed, false otherwise
271    */
272   boolean checkAndPut(byte[] row, byte[] family, byte[] qualifier,
273       byte[] value, Put put) throws IOException;
274 
275   /**
276    * Deletes the specified cells/row.
277    *
278    * @param delete The object that specifies what to delete.
279    * @throws IOException if a remote or network exception occurs.
280    * @since 0.20.0
281    */
282   void delete(Delete delete) throws IOException;
283 
284   /**
285    * Deletes the specified cells/rows in bulk.
286    * @param deletes List of things to delete.  List gets modified by this
287    * method (in particular it gets re-ordered, so the order in which the elements
288    * are inserted in the list gives no guarantee as to the order in which the
289    * {@link Delete}s are executed).
290    * @throws IOException if a remote or network exception occurs. In that case
291    * the {@code deletes} argument will contain the {@link Delete} instances
292    * that have not be successfully applied.
293    * @since 0.20.1
294    */
295   void delete(List<Delete> deletes) throws IOException;
296 
297   /**
298    * Atomically checks if a row/family/qualifier value matches the expected
299    * value. If it does, it adds the delete.  If the passed value is null, the
300    * check is for the lack of column (ie: non-existance)
301    *
302    * @param row to check
303    * @param family column family to check
304    * @param qualifier column qualifier to check
305    * @param value the expected value
306    * @param delete data to delete if check succeeds
307    * @throws IOException e
308    * @return true if the new delete was executed, false otherwise
309    */
310   boolean checkAndDelete(byte[] row, byte[] family, byte[] qualifier,
311       byte[] value, Delete delete) throws IOException;
312 
313   /**
314    * Performs multiple mutations atomically on a single row. Currently
315    * {@link Put} and {@link Delete} are supported.
316    *
317    * @param rm object that specifies the set of mutations to perform atomically
318    * @throws IOException
319    */
320   void mutateRow(final RowMutations rm) throws IOException;
321 
322   /**
323    * Appends values to one or more columns within a single row.
324    * <p>
325    * This operation does not appear atomic to readers.  Appends are done
326    * under a single row lock, so write operations to a row are synchronized, but
327    * readers do not take row locks so get and scan operations can see this
328    * operation partially completed.
329    *
330    * @param append object that specifies the columns and amounts to be used
331    *                  for the increment operations
332    * @throws IOException e
333    * @return values of columns after the append operation (maybe null)
334    */
335   Result append(final Append append) throws IOException;
336 
337   /**
338    * Increments one or more columns within a single row.
339    * <p>
340    * This operation does not appear atomic to readers.  Increments are done
341    * under a single row lock, so write operations to a row are synchronized, but
342    * readers do not take row locks so get and scan operations can see this
343    * operation partially completed.
344    *
345    * @param increment object that specifies the columns and amounts to be used
346    *                  for the increment operations
347    * @throws IOException e
348    * @return values of columns after the increment
349    */
350   Result increment(final Increment increment) throws IOException;
351 
352   /**
353    * See {@link #incrementColumnValue(byte[], byte[], byte[], long, Durability)}
354    * <p>
355    * The {@link Durability} is defaulted to {@link Durability#SYNC_WAL}.
356    * @param row The row that contains the cell to increment.
357    * @param family The column family of the cell to increment.
358    * @param qualifier The column qualifier of the cell to increment.
359    * @param amount The amount to increment the cell with (or decrement, if the
360    * amount is negative).
361    * @return The new value, post increment.
362    * @throws IOException if a remote or network exception occurs.
363    */
364   long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier,
365       long amount) throws IOException;
366 
367   /**
368    * Atomically increments a column value. If the column value already exists
369    * and is not a big-endian long, this could throw an exception. If the column
370    * value does not yet exist it is initialized to <code>amount</code> and
371    * written to the specified column.
372    *
373    * <p>Setting durability to {@link Durability#SKIP_WAL} means that in a fail
374    * scenario you will lose any increments that have not been flushed.
375    * @param row The row that contains the cell to increment.
376    * @param family The column family of the cell to increment.
377    * @param qualifier The column qualifier of the cell to increment.
378    * @param amount The amount to increment the cell with (or decrement, if the
379    * amount is negative).
380    * @param durability The persistence guarantee for this increment.
381    * @return The new value, post increment.
382    * @throws IOException if a remote or network exception occurs.
383    */
384   long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier,
385       long amount, Durability durability) throws IOException;
386 
387   /**
388    * Tells whether or not 'auto-flush' is turned on.
389    *
390    * @return {@code true} if 'auto-flush' is enabled (default), meaning
391    * {@link Put} operations don't get buffered/delayed and are immediately
392    * executed.
393    */
394   boolean isAutoFlush();
395 
396   /**
397    * Executes all the buffered {@link Put} operations.
398    * <p>
399    * This method gets called once automatically for every {@link Put} or batch
400    * of {@link Put}s (when <code>put(List<Put>)</code> is used) when
401    * {@link #isAutoFlush} is {@code true}.
402    * @throws IOException if a remote or network exception occurs.
403    */
404   void flushCommits() throws IOException;
405 
406   /**
407    * Releases any resources held or pending changes in internal buffers.
408    *
409    * @throws IOException if a remote or network exception occurs.
410    */
411   void close() throws IOException;
412 
413   /**
414    * Creates and returns a {@link com.google.protobuf.RpcChannel} instance connected to the
415    * table region containing the specified row.  The row given does not actually have
416    * to exist.  Whichever region would contain the row based on start and end keys will
417    * be used.  Note that the {@code row} parameter is also not passed to the
418    * coprocessor handler registered for this protocol, unless the {@code row}
419    * is separately passed as an argument in the service request.  The parameter
420    * here is only used to locate the region used to handle the call.
421    *
422    * <p>
423    * The obtained {@link com.google.protobuf.RpcChannel} instance can be used to access a published
424    * coprocessor {@link com.google.protobuf.Service} using standard protobuf service invocations:
425    * </p>
426    *
427    * <div style="background-color: #cccccc; padding: 2px">
428    * <blockquote><pre>
429    * CoprocessorRpcChannel channel = myTable.coprocessorService(rowkey);
430    * MyService.BlockingInterface service = MyService.newBlockingStub(channel);
431    * MyCallRequest request = MyCallRequest.newBuilder()
432    *     ...
433    *     .build();
434    * MyCallResponse response = service.myCall(null, request);
435    * </pre></blockquote></div>
436    *
437    * @param row The row key used to identify the remote region location
438    * @return A CoprocessorRpcChannel instance
439    */
440   CoprocessorRpcChannel coprocessorService(byte[] row);
441 
442   /**
443    * Creates an instance of the given {@link com.google.protobuf.Service} subclass for each table
444    * region spanning the range from the {@code startKey} row to {@code endKey} row (inclusive),
445    * and invokes the passed {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call}
446    * method with each {@link Service}
447    * instance.
448    *
449    * @param service the protocol buffer {@code Service} implementation to call
450    * @param startKey start region selection with region containing this row.  If {@code null}, the
451    *                 selection will start with the first table region.
452    * @param endKey select regions up to and including the region containing this row.
453    *               If {@code null}, selection will continue through the last table region.
454    * @param callable this instance's
455    *                 {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call}
456    *                 method will be invoked once per table region, using the {@link Service}
457    *                 instance connected to that region.
458    * @param <T> the {@link Service} subclass to connect to
459    * @param <R> Return type for the {@code callable} parameter's
460    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method
461    * @return a map of result values keyed by region name
462    */
463   <T extends Service, R> Map<byte[],R> coprocessorService(final Class<T> service,
464       byte[] startKey, byte[] endKey, final Batch.Call<T,R> callable)
465       throws ServiceException, Throwable;
466 
467   /**
468    * Creates an instance of the given {@link com.google.protobuf.Service} subclass for each table
469    * region spanning the range from the {@code startKey} row to {@code endKey} row (inclusive),
470    * and invokes the passed {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call}
471    * method with each {@link Service} instance.
472    *
473    * <p>
474    * The given
475    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Callback#update(byte[], byte[], Object)}
476    * method will be called with the return value from each region's
477    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} invocation.
478    *</p>
479    *
480    * @param service the protocol buffer {@code Service} implementation to call
481    * @param startKey start region selection with region containing this row.  If {@code null}, the
482    *                 selection will start with the first table region.
483    * @param endKey select regions up to and including the region containing this row.
484    *               If {@code null}, selection will continue through the last table region.
485    * @param callable this instance's
486    *                 {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method
487    *                 will be invoked once per table region, using the {@link Service} instance
488    *                 connected to that region.
489    * @param callback
490    * @param <T> the {@link Service} subclass to connect to
491    * @param <R> Return type for the {@code callable} parameter's
492    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method
493    */
494   <T extends Service, R> void coprocessorService(final Class<T> service,
495       byte[] startKey, byte[] endKey, final Batch.Call<T,R> callable,
496       final Batch.Callback<R> callback) throws ServiceException, Throwable;
497 
498   /**
499    * See {@link #setAutoFlush(boolean, boolean)}
500    *
501    * @param autoFlush
502    *        Whether or not to enable 'auto-flush'.
503    */
504   void setAutoFlush(boolean autoFlush);
505 
506   /**
507    * Turns 'auto-flush' on or off.
508    * <p>
509    * When enabled (default), {@link Put} operations don't get buffered/delayed
510    * and are immediately executed. Failed operations are not retried. This is
511    * slower but safer.
512    * <p>
513    * Turning off {@code autoFlush} means that multiple {@link Put}s will be
514    * accepted before any RPC is actually sent to do the write operations. If the
515    * application dies before pending writes get flushed to HBase, data will be
516    * lost.
517    * <p>
518    * When you turn {@code #autoFlush} off, you should also consider the
519    * {@code clearBufferOnFail} option. By default, asynchronous {@link Put}
520    * requests will be retried on failure until successful. However, this can
521    * pollute the writeBuffer and slow down batching performance. Additionally,
522    * you may want to issue a number of Put requests and call
523    * {@link #flushCommits()} as a barrier. In both use cases, consider setting
524    * clearBufferOnFail to true to erase the buffer after {@link #flushCommits()}
525    * has been called, regardless of success.
526    *
527    * @param autoFlush
528    *        Whether or not to enable 'auto-flush'.
529    * @param clearBufferOnFail
530    *        Whether to keep Put failures in the writeBuffer
531    * @see #flushCommits
532    */
533   void setAutoFlush(boolean autoFlush, boolean clearBufferOnFail);
534 
535   /**
536    * Returns the maximum size in bytes of the write buffer for this HTable.
537    * <p>
538    * The default value comes from the configuration parameter
539    * {@code hbase.client.write.buffer}.
540    * @return The size of the write buffer in bytes.
541    */
542   long getWriteBufferSize();
543 
544   /**
545    * Sets the size of the buffer in bytes.
546    * <p>
547    * If the new size is less than the current amount of data in the
548    * write buffer, the buffer gets flushed.
549    * @param writeBufferSize The new write buffer size, in bytes.
550    * @throws IOException if a remote or network exception occurs.
551    */
552   void setWriteBufferSize(long writeBufferSize) throws IOException;
553 }