View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.client;
20  
21  import com.google.protobuf.Service;
22  import com.google.protobuf.ServiceException;
23  
24  import org.apache.hadoop.classification.InterfaceAudience;
25  import org.apache.hadoop.classification.InterfaceStability;
26  import org.apache.hadoop.conf.Configuration;
27  import org.apache.hadoop.hbase.TableName;
28  import org.apache.hadoop.hbase.HTableDescriptor;
29  import org.apache.hadoop.hbase.KeyValue;
30  import org.apache.hadoop.hbase.client.coprocessor.Batch;
31  import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel;
32  
33  import java.io.Closeable;
34  import java.io.IOException;
35  import java.util.List;
36  import java.util.Map;
37  
38  /**
39   * Used to communicate with a single HBase table.
40   * Obtain an instance from an {@link HConnection}.
41   *
42   * @since 0.21.0
43   */
44  @InterfaceAudience.Public
45  @InterfaceStability.Stable
46  public interface HTableInterface extends Closeable {
47  
48    /**
49     * Gets the name of this table.
50     *
51     * @return the table name.
52     */
53    byte[] getTableName();
54  
55    /**
56     * Gets the fully qualified table name instance of this table.
57     */
58    TableName getName();
59  
60    /**
61     * Returns the {@link Configuration} object used by this instance.
62     * <p>
63     * The reference returned is not a copy, so any change made to it will
64     * affect this instance.
65     */
66    Configuration getConfiguration();
67  
68    /**
69     * Gets the {@link HTableDescriptor table descriptor} for this table.
70     * @throws IOException if a remote or network exception occurs.
71     */
72    HTableDescriptor getTableDescriptor() throws IOException;
73  
74    /**
75     * Test for the existence of columns in the table, as specified by the Get.
76     * <p>
77     *
78     * This will return true if the Get matches one or more keys, false if not.
79     * <p>
80     *
81     * This is a server-side call so it prevents any data from being transfered to
82     * the client.
83     *
84     * @param get the Get
85     * @return true if the specified Get matches one or more keys, false if not
86     * @throws IOException e
87     */
88    boolean exists(Get get) throws IOException;
89  
90    /**
91     * Test for the existence of columns in the table, as specified by the Gets.
92     * <p>
93     *
94     * This will return an array of booleans. Each value will be true if the related Get matches
95     * one or more keys, false if not.
96     * <p>
97     *
98     * This is a server-side call so it prevents any data from being transfered to
99     * the client.
100    *
101    * @param gets the Gets
102    * @return Array of Boolean true if the specified Get matches one or more keys, false if not
103    * @throws IOException e
104    */
105   Boolean[] exists(List<Get> gets) throws IOException;
106 
107   /**
108    * Method that does a batch call on Deletes, Gets, Puts, Increments, Appends and RowMutations.
109    * The ordering of execution of the actions is not defined. Meaning if you do a Put and a
110    * Get in the same {@link #batch} call, you will not necessarily be
111    * guaranteed that the Get returns what the Put had put.
112    *
113    * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects
114    * @param results Empty Object[], same size as actions. Provides access to partial
115    *                results, in case an exception is thrown. A null in the result array means that
116    *                the call for that action failed, even after retries
117    * @throws IOException
118    * @since 0.90.0
119    */
120   void batch(final List<?extends Row> actions, final Object[] results) throws IOException, InterruptedException;
121 
122   /**
123    * Same as {@link #batch(List, Object[])}, but returns an array of
124    * results instead of using a results parameter reference.
125    *
126    * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects
127    * @return the results from the actions. A null in the return array means that
128    *         the call for that action failed, even after retries
129    * @throws IOException
130    * @since 0.90.0
131    * @deprecated If any exception is thrown by one of the actions, there is no way to
132    * retrieve the partially executed results. Use {@link #batch(List, Object[])} instead.
133    */
134   Object[] batch(final List<? extends Row> actions) throws IOException, InterruptedException;
135 
136   /**
137    * Same as {@link #batch(List, Object[])}, but with a callback.
138    * @since 0.96.0
139    */
140   <R> void batchCallback(
141     final List<? extends Row> actions, final Object[] results, final Batch.Callback<R> callback
142   )
143     throws IOException, InterruptedException;
144 
145 
146   /**
147    * Same as {@link #batch(List)}, but with a callback.
148    * @since 0.96.0
149    * @deprecated If any exception is thrown by one of the actions, there is no way to
150    * retrieve the partially executed results. Use
151    * {@link #batchCallback(List, Object[], org.apache.hadoop.hbase.client.coprocessor.Batch.Callback)}
152    * instead.
153    */
154   <R> Object[] batchCallback(
155     List<? extends Row> actions, Batch.Callback<R> callback
156   ) throws IOException,
157     InterruptedException;
158 
159   /**
160    * Extracts certain cells from a given row.
161    * @param get The object that specifies what data to fetch and from which row.
162    * @return The data coming from the specified row, if it exists.  If the row
163    * specified doesn't exist, the {@link Result} instance returned won't
164    * contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}.
165    * @throws IOException if a remote or network exception occurs.
166    * @since 0.20.0
167    */
168   Result get(Get get) throws IOException;
169 
170   /**
171    * Extracts certain cells from the given rows, in batch.
172    *
173    * @param gets The objects that specify what data to fetch and from which rows.
174    *
175    * @return The data coming from the specified rows, if it exists.  If the row
176    *         specified doesn't exist, the {@link Result} instance returned won't
177    *         contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}.
178    *         If there are any failures even after retries, there will be a null in
179    *         the results array for those Gets, AND an exception will be thrown.
180    * @throws IOException if a remote or network exception occurs.
181    *
182    * @since 0.90.0
183    */
184   Result[] get(List<Get> gets) throws IOException;
185 
186   /**
187    * Return the row that matches <i>row</i> exactly,
188    * or the one that immediately precedes it.
189    *
190    * @param row A row key.
191    * @param family Column family to include in the {@link Result}.
192    * @throws IOException if a remote or network exception occurs.
193    * @since 0.20.0
194    * 
195    * @deprecated As of version 0.92 this method is deprecated without
196    * replacement.   
197    * getRowOrBefore is used internally to find entries in hbase:meta and makes
198    * various assumptions about the table (which are true for hbase:meta but not
199    * in general) to be efficient.
200    */
201   Result getRowOrBefore(byte[] row, byte[] family) throws IOException;
202 
203   /**
204    * Returns a scanner on the current table as specified by the {@link Scan}
205    * object.
206    * Note that the passed {@link Scan}'s start row and caching properties
207    * maybe changed.
208    *
209    * @param scan A configured {@link Scan} object.
210    * @return A scanner.
211    * @throws IOException if a remote or network exception occurs.
212    * @since 0.20.0
213    */
214   ResultScanner getScanner(Scan scan) throws IOException;
215 
216   /**
217    * Gets a scanner on the current table for the given family.
218    *
219    * @param family The column family to scan.
220    * @return A scanner.
221    * @throws IOException if a remote or network exception occurs.
222    * @since 0.20.0
223    */
224   ResultScanner getScanner(byte[] family) throws IOException;
225 
226   /**
227    * Gets a scanner on the current table for the given family and qualifier.
228    *
229    * @param family The column family to scan.
230    * @param qualifier The column qualifier to scan.
231    * @return A scanner.
232    * @throws IOException if a remote or network exception occurs.
233    * @since 0.20.0
234    */
235   ResultScanner getScanner(byte[] family, byte[] qualifier) throws IOException;
236 
237 
238   /**
239    * Puts some data in the table.
240    * <p>
241    * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered
242    * until the internal buffer is full.
243    * @param put The data to put.
244    * @throws IOException if a remote or network exception occurs.
245    * @since 0.20.0
246    */
247   void put(Put put) throws IOException;
248 
249   /**
250    * Puts some data in the table, in batch.
251    * <p>
252    * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered
253    * until the internal buffer is full.
254    * <p>
255    * This can be used for group commit, or for submitting user defined
256    * batches.  The writeBuffer will be periodically inspected while the List
257    * is processed, so depending on the List size the writeBuffer may flush
258    * not at all, or more than once.
259    * @param puts The list of mutations to apply. The batch put is done by
260    * aggregating the iteration of the Puts over the write buffer
261    * at the client-side for a single RPC call.
262    * @throws IOException if a remote or network exception occurs.
263    * @since 0.20.0
264    */
265   void put(List<Put> puts) throws IOException;
266 
267   /**
268    * Atomically checks if a row/family/qualifier value matches the expected
269    * value. If it does, it adds the put.  If the passed value is null, the check
270    * is for the lack of column (ie: non-existance)
271    *
272    * @param row to check
273    * @param family column family to check
274    * @param qualifier column qualifier to check
275    * @param value the expected value
276    * @param put data to put if check succeeds
277    * @throws IOException e
278    * @return true if the new put was executed, false otherwise
279    */
280   boolean checkAndPut(byte[] row, byte[] family, byte[] qualifier,
281       byte[] value, Put put) throws IOException;
282 
283   /**
284    * Deletes the specified cells/row.
285    *
286    * @param delete The object that specifies what to delete.
287    * @throws IOException if a remote or network exception occurs.
288    * @since 0.20.0
289    */
290   void delete(Delete delete) throws IOException;
291 
292   /**
293    * Deletes the specified cells/rows in bulk.
294    * @param deletes List of things to delete.  List gets modified by this
295    * method (in particular it gets re-ordered, so the order in which the elements
296    * are inserted in the list gives no guarantee as to the order in which the
297    * {@link Delete}s are executed).
298    * @throws IOException if a remote or network exception occurs. In that case
299    * the {@code deletes} argument will contain the {@link Delete} instances
300    * that have not be successfully applied.
301    * @since 0.20.1
302    */
303   void delete(List<Delete> deletes) throws IOException;
304 
305   /**
306    * Atomically checks if a row/family/qualifier value matches the expected
307    * value. If it does, it adds the delete.  If the passed value is null, the
308    * check is for the lack of column (ie: non-existance)
309    *
310    * @param row to check
311    * @param family column family to check
312    * @param qualifier column qualifier to check
313    * @param value the expected value
314    * @param delete data to delete if check succeeds
315    * @throws IOException e
316    * @return true if the new delete was executed, false otherwise
317    */
318   boolean checkAndDelete(byte[] row, byte[] family, byte[] qualifier,
319       byte[] value, Delete delete) throws IOException;
320 
321   /**
322    * Performs multiple mutations atomically on a single row. Currently
323    * {@link Put} and {@link Delete} are supported.
324    *
325    * @param rm object that specifies the set of mutations to perform atomically
326    * @throws IOException
327    */
328   void mutateRow(final RowMutations rm) throws IOException;
329 
330   /**
331    * Appends values to one or more columns within a single row.
332    * <p>
333    * This operation does not appear atomic to readers.  Appends are done
334    * under a single row lock, so write operations to a row are synchronized, but
335    * readers do not take row locks so get and scan operations can see this
336    * operation partially completed.
337    *
338    * @param append object that specifies the columns and amounts to be used
339    *                  for the increment operations
340    * @throws IOException e
341    * @return values of columns after the append operation (maybe null)
342    */
343   Result append(final Append append) throws IOException;
344 
345   /**
346    * Increments one or more columns within a single row.
347    * <p>
348    * This operation does not appear atomic to readers.  Increments are done
349    * under a single row lock, so write operations to a row are synchronized, but
350    * readers do not take row locks so get and scan operations can see this
351    * operation partially completed.
352    *
353    * @param increment object that specifies the columns and amounts to be used
354    *                  for the increment operations
355    * @throws IOException e
356    * @return values of columns after the increment
357    */
358   Result increment(final Increment increment) throws IOException;
359 
360   /**
361    * See {@link #incrementColumnValue(byte[], byte[], byte[], long, Durability)}
362    * <p>
363    * The {@link Durability} is defaulted to {@link Durability#SYNC_WAL}.
364    * @param row The row that contains the cell to increment.
365    * @param family The column family of the cell to increment.
366    * @param qualifier The column qualifier of the cell to increment.
367    * @param amount The amount to increment the cell with (or decrement, if the
368    * amount is negative).
369    * @return The new value, post increment.
370    * @throws IOException if a remote or network exception occurs.
371    */
372   long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier,
373       long amount) throws IOException;
374 
375   /**
376    * Atomically increments a column value. If the column value already exists
377    * and is not a big-endian long, this could throw an exception. If the column
378    * value does not yet exist it is initialized to <code>amount</code> and
379    * written to the specified column.
380    *
381    * <p>Setting durability to {@link Durability#SKIP_WAL} means that in a fail
382    * scenario you will lose any increments that have not been flushed.
383    * @param row The row that contains the cell to increment.
384    * @param family The column family of the cell to increment.
385    * @param qualifier The column qualifier of the cell to increment.
386    * @param amount The amount to increment the cell with (or decrement, if the
387    * amount is negative).
388    * @param durability The persistence guarantee for this increment.
389    * @return The new value, post increment.
390    * @throws IOException if a remote or network exception occurs.
391    */
392   long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier,
393       long amount, Durability durability) throws IOException;
394 
395   /**
396    * @deprecated Use {@link #incrementColumnValue(byte[], byte[], byte[], long, Durability)}
397    */
398   @Deprecated
399   long incrementColumnValue(final byte [] row, final byte [] family,
400       final byte [] qualifier, final long amount, final boolean writeToWAL)
401   throws IOException;
402 
403   /**
404    * Tells whether or not 'auto-flush' is turned on.
405    *
406    * @return {@code true} if 'auto-flush' is enabled (default), meaning
407    * {@link Put} operations don't get buffered/delayed and are immediately
408    * executed.
409    */
410   boolean isAutoFlush();
411 
412   /**
413    * Executes all the buffered {@link Put} operations.
414    * <p>
415    * This method gets called once automatically for every {@link Put} or batch
416    * of {@link Put}s (when <code>put(List<Put>)</code> is used) when
417    * {@link #isAutoFlush} is {@code true}.
418    * @throws IOException if a remote or network exception occurs.
419    */
420   void flushCommits() throws IOException;
421 
422   /**
423    * Releases any resources held or pending changes in internal buffers.
424    *
425    * @throws IOException if a remote or network exception occurs.
426    */
427   void close() throws IOException;
428 
429   /**
430    * Creates and returns a {@link com.google.protobuf.RpcChannel} instance connected to the
431    * table region containing the specified row.  The row given does not actually have
432    * to exist.  Whichever region would contain the row based on start and end keys will
433    * be used.  Note that the {@code row} parameter is also not passed to the
434    * coprocessor handler registered for this protocol, unless the {@code row}
435    * is separately passed as an argument in the service request.  The parameter
436    * here is only used to locate the region used to handle the call.
437    *
438    * <p>
439    * The obtained {@link com.google.protobuf.RpcChannel} instance can be used to access a published
440    * coprocessor {@link com.google.protobuf.Service} using standard protobuf service invocations:
441    * </p>
442    *
443    * <div style="background-color: #cccccc; padding: 2px">
444    * <blockquote><pre>
445    * CoprocessorRpcChannel channel = myTable.coprocessorService(rowkey);
446    * MyService.BlockingInterface service = MyService.newBlockingStub(channel);
447    * MyCallRequest request = MyCallRequest.newBuilder()
448    *     ...
449    *     .build();
450    * MyCallResponse response = service.myCall(null, request);
451    * </pre></blockquote></div>
452    *
453    * @param row The row key used to identify the remote region location
454    * @return A CoprocessorRpcChannel instance
455    */
456   @InterfaceAudience.Private // TODO add coproc audience level  
457   CoprocessorRpcChannel coprocessorService(byte[] row);
458 
459   /**
460    * Creates an instance of the given {@link com.google.protobuf.Service} subclass for each table
461    * region spanning the range from the {@code startKey} row to {@code endKey} row (inclusive),
462    * and invokes the passed {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call}
463    * method with each {@link Service}
464    * instance.
465    *
466    * @param service the protocol buffer {@code Service} implementation to call
467    * @param startKey start region selection with region containing this row.  If {@code null}, the
468    *                 selection will start with the first table region.
469    * @param endKey select regions up to and including the region containing this row.
470    *               If {@code null}, selection will continue through the last table region.
471    * @param callable this instance's
472    *                 {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call}
473    *                 method will be invoked once per table region, using the {@link Service}
474    *                 instance connected to that region.
475    * @param <T> the {@link Service} subclass to connect to
476    * @param <R> Return type for the {@code callable} parameter's
477    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method
478    * @return a map of result values keyed by region name
479    */
480   @InterfaceAudience.Private // TODO add coproc audience level
481   <T extends Service, R> Map<byte[],R> coprocessorService(final Class<T> service,
482       byte[] startKey, byte[] endKey, final Batch.Call<T,R> callable)
483       throws ServiceException, Throwable;
484 
485   /**
486    * Creates an instance of the given {@link com.google.protobuf.Service} subclass for each table
487    * region spanning the range from the {@code startKey} row to {@code endKey} row (inclusive),
488    * and invokes the passed {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call}
489    * method with each {@link Service} instance.
490    *
491    * <p>
492    * The given
493    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Callback#update(byte[], byte[], Object)}
494    * method will be called with the return value from each region's
495    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} invocation.
496    *</p>
497    *
498    * @param service the protocol buffer {@code Service} implementation to call
499    * @param startKey start region selection with region containing this row.  If {@code null}, the
500    *                 selection will start with the first table region.
501    * @param endKey select regions up to and including the region containing this row.
502    *               If {@code null}, selection will continue through the last table region.
503    * @param callable this instance's
504    *                 {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method
505    *                 will be invoked once per table region, using the {@link Service} instance
506    *                 connected to that region.
507    * @param callback
508    * @param <T> the {@link Service} subclass to connect to
509    * @param <R> Return type for the {@code callable} parameter's
510    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method
511    */
512   @InterfaceAudience.Private // TODO add coproc audience level
513   <T extends Service, R> void coprocessorService(final Class<T> service,
514       byte[] startKey, byte[] endKey, final Batch.Call<T,R> callable,
515       final Batch.Callback<R> callback) throws ServiceException, Throwable;
516 
517   /**
518    * See {@link #setAutoFlush(boolean, boolean)}
519    *
520    * @param autoFlush
521    *          Whether or not to enable 'auto-flush'.
522    * @deprecated in 0.96. When called with setAutoFlush(false), this function also
523    *  set clearBufferOnFail to true, which is unexpected but kept for historical reasons.
524    *  Replace it with setAutoFlush(false, false) if this is exactly what you want, or by
525    *  {@link #setAutoFlushTo(boolean)} for all other cases.
526    */
527   @Deprecated
528   void setAutoFlush(boolean autoFlush);
529 
530   /**
531    * Turns 'auto-flush' on or off.
532    * <p>
533    * When enabled (default), {@link Put} operations don't get buffered/delayed
534    * and are immediately executed. Failed operations are not retried. This is
535    * slower but safer.
536    * <p>
537    * Turning off {@code #autoFlush} means that multiple {@link Put}s will be
538    * accepted before any RPC is actually sent to do the write operations. If the
539    * application dies before pending writes get flushed to HBase, data will be
540    * lost.
541    * <p>
542    * When you turn {@code #autoFlush} off, you should also consider the
543    * {@code #clearBufferOnFail} option. By default, asynchronous {@link Put}
544    * requests will be retried on failure until successful. However, this can
545    * pollute the writeBuffer and slow down batching performance. Additionally,
546    * you may want to issue a number of Put requests and call
547    * {@link #flushCommits()} as a barrier. In both use cases, consider setting
548    * clearBufferOnFail to true to erase the buffer after {@link #flushCommits()}
549    * has been called, regardless of success.
550    * <p>
551    * In other words, if you call {@code #setAutoFlush(false)}; HBase will retry N time for each
552    *  flushCommit, including the last one when closing the table. This is NOT recommended,
553    *  most of the time you want to call {@code #setAutoFlush(false, true)}.
554    *
555    * @param autoFlush
556    *          Whether or not to enable 'auto-flush'.
557    * @param clearBufferOnFail
558    *          Whether to keep Put failures in the writeBuffer. If autoFlush is true, then
559    *          the value of this parameter is ignored and clearBufferOnFail is set to true.
560    *          Setting clearBufferOnFail to false is deprecated since 0.96.
561    * @see #flushCommits
562    */
563   void setAutoFlush(boolean autoFlush, boolean clearBufferOnFail);
564 
565   /**
566    * Set the autoFlush behavior, without changing the value of {@code clearBufferOnFail}
567    */
568   void setAutoFlushTo(boolean autoFlush);
569 
570   /**
571    * Returns the maximum size in bytes of the write buffer for this HTable.
572    * <p>
573    * The default value comes from the configuration parameter
574    * {@code hbase.client.write.buffer}.
575    * @return The size of the write buffer in bytes.
576    */
577   long getWriteBufferSize();
578 
579   /**
580    * Sets the size of the buffer in bytes.
581    * <p>
582    * If the new size is less than the current amount of data in the
583    * write buffer, the buffer gets flushed.
584    * @param writeBufferSize The new write buffer size, in bytes.
585    * @throws IOException if a remote or network exception occurs.
586    */
587   void setWriteBufferSize(long writeBufferSize) throws IOException;
588 }