View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.client;
20  
21  import com.google.protobuf.Service;
22  import com.google.protobuf.ServiceException;
23  
24  import org.apache.hadoop.classification.InterfaceAudience;
25  import org.apache.hadoop.classification.InterfaceStability;
26  import org.apache.hadoop.conf.Configuration;
27  import org.apache.hadoop.hbase.TableName;
28  import org.apache.hadoop.hbase.HTableDescriptor;
29  import org.apache.hadoop.hbase.KeyValue;
30  import org.apache.hadoop.hbase.client.coprocessor.Batch;
31  import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel;
32  
33  import java.io.Closeable;
34  import java.io.IOException;
35  import java.util.List;
36  import java.util.Map;
37  
38  /**
39   * Used to communicate with a single HBase table.
40   * Obtain an instance from an {@link HConnection}.
41   *
42   * @since 0.21.0
43   */
44  @InterfaceAudience.Public
45  @InterfaceStability.Stable
46  public interface HTableInterface extends Closeable {
47  
48    /**
49     * Gets the name of this table.
50     *
51     * @return the table name.
52     */
53    byte[] getTableName();
54  
55    /**
56     * Gets the fully qualified table name instance of this table.
57     */
58    TableName getName();
59  
60    /**
61     * Returns the {@link Configuration} object used by this instance.
62     * <p>
63     * The reference returned is not a copy, so any change made to it will
64     * affect this instance.
65     */
66    Configuration getConfiguration();
67  
68    /**
69     * Gets the {@link HTableDescriptor table descriptor} for this table.
70     * @throws IOException if a remote or network exception occurs.
71     */
72    HTableDescriptor getTableDescriptor() throws IOException;
73  
74    /**
75     * Test for the existence of columns in the table, as specified by the Get.
76     * <p>
77     *
78     * This will return true if the Get matches one or more keys, false if not.
79     * <p>
80     *
81     * This is a server-side call so it prevents any data from being transfered to
82     * the client.
83     *
84     * @param get the Get
85     * @return true if the specified Get matches one or more keys, false if not
86     * @throws IOException e
87     */
88    boolean exists(Get get) throws IOException;
89  
90    /**
91     * Test for the existence of columns in the table, as specified by the Gets.
92     * <p>
93     *
94     * This will return an array of booleans. Each value will be true if the related Get matches
95     * one or more keys, false if not.
96     * <p>
97     *
98     * This is a server-side call so it prevents any data from being transfered to
99     * the client.
100    *
101    * @param gets the Gets
102    * @return Array of Boolean true if the specified Get matches one or more keys, false if not
103    * @throws IOException e
104    */
105   Boolean[] exists(List<Get> gets) throws IOException;
106 
107   /**
108    * Method that does a batch call on Deletes, Gets, Puts, Increments, Appends and RowMutations.
109    * The ordering of execution of the actions is not defined. Meaning if you do a Put and a
110    * Get in the same {@link #batch} call, you will not necessarily be
111    * guaranteed that the Get returns what the Put had put.
112    *
113    * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects
114    * @param results Empty Object[], same size as actions. Provides access to partial
115    *                results, in case an exception is thrown. A null in the result array means that
116    *                the call for that action failed, even after retries
117    * @throws IOException
118    * @since 0.90.0
119    */
120   void batch(final List<?extends Row> actions, final Object[] results) throws IOException, InterruptedException;
121 
122   /**
123    * Same as {@link #batch(List, Object[])}, but returns an array of
124    * results instead of using a results parameter reference.
125    *
126    * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects
127    * @return the results from the actions. A null in the return array means that
128    *         the call for that action failed, even after retries
129    * @throws IOException
130    * @since 0.90.0
131    */
132   Object[] batch(final List<? extends Row> actions) throws IOException, InterruptedException;
133 
134   /**
135    * Same as {@link #batch(List, Object[])}, but with a callback.
136    * @since 0.96.0
137    */
138   <R> void batchCallback(
139     final List<? extends Row> actions, final Object[] results, final Batch.Callback<R> callback
140   )
141     throws IOException, InterruptedException;
142 
143 
144   /**
145    * Same as {@link #batch(List)}, but with a callback.
146    * @since 0.96.0
147    */
148   <R> Object[] batchCallback(
149     List<? extends Row> actions, Batch.Callback<R> callback
150   ) throws IOException,
151     InterruptedException;
152 
153   /**
154    * Extracts certain cells from a given row.
155    * @param get The object that specifies what data to fetch and from which row.
156    * @return The data coming from the specified row, if it exists.  If the row
157    * specified doesn't exist, the {@link Result} instance returned won't
158    * contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}.
159    * @throws IOException if a remote or network exception occurs.
160    * @since 0.20.0
161    */
162   Result get(Get get) throws IOException;
163 
164   /**
165    * Extracts certain cells from the given rows, in batch.
166    *
167    * @param gets The objects that specify what data to fetch and from which rows.
168    *
169    * @return The data coming from the specified rows, if it exists.  If the row
170    *         specified doesn't exist, the {@link Result} instance returned won't
171    *         contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}.
172    *         If there are any failures even after retries, there will be a null in
173    *         the results array for those Gets, AND an exception will be thrown.
174    * @throws IOException if a remote or network exception occurs.
175    *
176    * @since 0.90.0
177    */
178   Result[] get(List<Get> gets) throws IOException;
179 
180   /**
181    * Return the row that matches <i>row</i> exactly,
182    * or the one that immediately precedes it.
183    *
184    * @param row A row key.
185    * @param family Column family to include in the {@link Result}.
186    * @throws IOException if a remote or network exception occurs.
187    * @since 0.20.0
188    * 
189    * @deprecated As of version 0.92 this method is deprecated without
190    * replacement.   
191    * getRowOrBefore is used internally to find entries in hbase:meta and makes
192    * various assumptions about the table (which are true for hbase:meta but not
193    * in general) to be efficient.
194    */
195   Result getRowOrBefore(byte[] row, byte[] family) throws IOException;
196 
197   /**
198    * Returns a scanner on the current table as specified by the {@link Scan}
199    * object.
200    * Note that the passed {@link Scan}'s start row and caching properties
201    * maybe changed.
202    *
203    * @param scan A configured {@link Scan} object.
204    * @return A scanner.
205    * @throws IOException if a remote or network exception occurs.
206    * @since 0.20.0
207    */
208   ResultScanner getScanner(Scan scan) throws IOException;
209 
210   /**
211    * Gets a scanner on the current table for the given family.
212    *
213    * @param family The column family to scan.
214    * @return A scanner.
215    * @throws IOException if a remote or network exception occurs.
216    * @since 0.20.0
217    */
218   ResultScanner getScanner(byte[] family) throws IOException;
219 
220   /**
221    * Gets a scanner on the current table for the given family and qualifier.
222    *
223    * @param family The column family to scan.
224    * @param qualifier The column qualifier to scan.
225    * @return A scanner.
226    * @throws IOException if a remote or network exception occurs.
227    * @since 0.20.0
228    */
229   ResultScanner getScanner(byte[] family, byte[] qualifier) throws IOException;
230 
231 
232   /**
233    * Puts some data in the table.
234    * <p>
235    * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered
236    * until the internal buffer is full.
237    * @param put The data to put.
238    * @throws IOException if a remote or network exception occurs.
239    * @since 0.20.0
240    */
241   void put(Put put) throws IOException;
242 
243   /**
244    * Puts some data in the table, in batch.
245    * <p>
246    * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered
247    * until the internal buffer is full.
248    * <p>
249    * This can be used for group commit, or for submitting user defined
250    * batches.  The writeBuffer will be periodically inspected while the List
251    * is processed, so depending on the List size the writeBuffer may flush
252    * not at all, or more than once.
253    * @param puts The list of mutations to apply. The batch put is done by
254    * aggregating the iteration of the Puts over the write buffer
255    * at the client-side for a single RPC call.
256    * @throws IOException if a remote or network exception occurs.
257    * @since 0.20.0
258    */
259   void put(List<Put> puts) throws IOException;
260 
261   /**
262    * Atomically checks if a row/family/qualifier value matches the expected
263    * value. If it does, it adds the put.  If the passed value is null, the check
264    * is for the lack of column (ie: non-existance)
265    *
266    * @param row to check
267    * @param family column family to check
268    * @param qualifier column qualifier to check
269    * @param value the expected value
270    * @param put data to put if check succeeds
271    * @throws IOException e
272    * @return true if the new put was executed, false otherwise
273    */
274   boolean checkAndPut(byte[] row, byte[] family, byte[] qualifier,
275       byte[] value, Put put) throws IOException;
276 
277   /**
278    * Deletes the specified cells/row.
279    *
280    * @param delete The object that specifies what to delete.
281    * @throws IOException if a remote or network exception occurs.
282    * @since 0.20.0
283    */
284   void delete(Delete delete) throws IOException;
285 
286   /**
287    * Deletes the specified cells/rows in bulk.
288    * @param deletes List of things to delete.  List gets modified by this
289    * method (in particular it gets re-ordered, so the order in which the elements
290    * are inserted in the list gives no guarantee as to the order in which the
291    * {@link Delete}s are executed).
292    * @throws IOException if a remote or network exception occurs. In that case
293    * the {@code deletes} argument will contain the {@link Delete} instances
294    * that have not be successfully applied.
295    * @since 0.20.1
296    */
297   void delete(List<Delete> deletes) throws IOException;
298 
299   /**
300    * Atomically checks if a row/family/qualifier value matches the expected
301    * value. If it does, it adds the delete.  If the passed value is null, the
302    * check is for the lack of column (ie: non-existance)
303    *
304    * @param row to check
305    * @param family column family to check
306    * @param qualifier column qualifier to check
307    * @param value the expected value
308    * @param delete data to delete if check succeeds
309    * @throws IOException e
310    * @return true if the new delete was executed, false otherwise
311    */
312   boolean checkAndDelete(byte[] row, byte[] family, byte[] qualifier,
313       byte[] value, Delete delete) throws IOException;
314 
315   /**
316    * Performs multiple mutations atomically on a single row. Currently
317    * {@link Put} and {@link Delete} are supported.
318    *
319    * @param rm object that specifies the set of mutations to perform atomically
320    * @throws IOException
321    */
322   void mutateRow(final RowMutations rm) throws IOException;
323 
324   /**
325    * Appends values to one or more columns within a single row.
326    * <p>
327    * This operation does not appear atomic to readers.  Appends are done
328    * under a single row lock, so write operations to a row are synchronized, but
329    * readers do not take row locks so get and scan operations can see this
330    * operation partially completed.
331    *
332    * @param append object that specifies the columns and amounts to be used
333    *                  for the increment operations
334    * @throws IOException e
335    * @return values of columns after the append operation (maybe null)
336    */
337   Result append(final Append append) throws IOException;
338 
339   /**
340    * Increments one or more columns within a single row.
341    * <p>
342    * This operation does not appear atomic to readers.  Increments are done
343    * under a single row lock, so write operations to a row are synchronized, but
344    * readers do not take row locks so get and scan operations can see this
345    * operation partially completed.
346    *
347    * @param increment object that specifies the columns and amounts to be used
348    *                  for the increment operations
349    * @throws IOException e
350    * @return values of columns after the increment
351    */
352   Result increment(final Increment increment) throws IOException;
353 
354   /**
355    * See {@link #incrementColumnValue(byte[], byte[], byte[], long, Durability)}
356    * <p>
357    * The {@link Durability} is defaulted to {@link Durability#SYNC_WAL}.
358    * @param row The row that contains the cell to increment.
359    * @param family The column family of the cell to increment.
360    * @param qualifier The column qualifier of the cell to increment.
361    * @param amount The amount to increment the cell with (or decrement, if the
362    * amount is negative).
363    * @return The new value, post increment.
364    * @throws IOException if a remote or network exception occurs.
365    */
366   long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier,
367       long amount) throws IOException;
368 
369   /**
370    * Atomically increments a column value. If the column value already exists
371    * and is not a big-endian long, this could throw an exception. If the column
372    * value does not yet exist it is initialized to <code>amount</code> and
373    * written to the specified column.
374    *
375    * <p>Setting durability to {@link Durability#SKIP_WAL} means that in a fail
376    * scenario you will lose any increments that have not been flushed.
377    * @param row The row that contains the cell to increment.
378    * @param family The column family of the cell to increment.
379    * @param qualifier The column qualifier of the cell to increment.
380    * @param amount The amount to increment the cell with (or decrement, if the
381    * amount is negative).
382    * @param durability The persistence guarantee for this increment.
383    * @return The new value, post increment.
384    * @throws IOException if a remote or network exception occurs.
385    */
386   long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier,
387       long amount, Durability durability) throws IOException;
388 
389   /**
390    * @deprecated Use {@link #incrementColumnValue(byte[], byte[], byte[], long, Durability)}
391    */
392   @Deprecated
393   long incrementColumnValue(final byte [] row, final byte [] family,
394       final byte [] qualifier, final long amount, final boolean writeToWAL)
395   throws IOException;
396 
397   /**
398    * Tells whether or not 'auto-flush' is turned on.
399    *
400    * @return {@code true} if 'auto-flush' is enabled (default), meaning
401    * {@link Put} operations don't get buffered/delayed and are immediately
402    * executed.
403    */
404   boolean isAutoFlush();
405 
406   /**
407    * Executes all the buffered {@link Put} operations.
408    * <p>
409    * This method gets called once automatically for every {@link Put} or batch
410    * of {@link Put}s (when <code>put(List<Put>)</code> is used) when
411    * {@link #isAutoFlush} is {@code true}.
412    * @throws IOException if a remote or network exception occurs.
413    */
414   void flushCommits() throws IOException;
415 
416   /**
417    * Releases any resources held or pending changes in internal buffers.
418    *
419    * @throws IOException if a remote or network exception occurs.
420    */
421   void close() throws IOException;
422 
423   /**
424    * Creates and returns a {@link com.google.protobuf.RpcChannel} instance connected to the
425    * table region containing the specified row.  The row given does not actually have
426    * to exist.  Whichever region would contain the row based on start and end keys will
427    * be used.  Note that the {@code row} parameter is also not passed to the
428    * coprocessor handler registered for this protocol, unless the {@code row}
429    * is separately passed as an argument in the service request.  The parameter
430    * here is only used to locate the region used to handle the call.
431    *
432    * <p>
433    * The obtained {@link com.google.protobuf.RpcChannel} instance can be used to access a published
434    * coprocessor {@link com.google.protobuf.Service} using standard protobuf service invocations:
435    * </p>
436    *
437    * <div style="background-color: #cccccc; padding: 2px">
438    * <blockquote><pre>
439    * CoprocessorRpcChannel channel = myTable.coprocessorService(rowkey);
440    * MyService.BlockingInterface service = MyService.newBlockingStub(channel);
441    * MyCallRequest request = MyCallRequest.newBuilder()
442    *     ...
443    *     .build();
444    * MyCallResponse response = service.myCall(null, request);
445    * </pre></blockquote></div>
446    *
447    * @param row The row key used to identify the remote region location
448    * @return A CoprocessorRpcChannel instance
449    */
450   @InterfaceAudience.Private // TODO add coproc audience level  
451   CoprocessorRpcChannel coprocessorService(byte[] row);
452 
453   /**
454    * Creates an instance of the given {@link com.google.protobuf.Service} subclass for each table
455    * region spanning the range from the {@code startKey} row to {@code endKey} row (inclusive),
456    * and invokes the passed {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call}
457    * method with each {@link Service}
458    * instance.
459    *
460    * @param service the protocol buffer {@code Service} implementation to call
461    * @param startKey start region selection with region containing this row.  If {@code null}, the
462    *                 selection will start with the first table region.
463    * @param endKey select regions up to and including the region containing this row.
464    *               If {@code null}, selection will continue through the last table region.
465    * @param callable this instance's
466    *                 {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call}
467    *                 method will be invoked once per table region, using the {@link Service}
468    *                 instance connected to that region.
469    * @param <T> the {@link Service} subclass to connect to
470    * @param <R> Return type for the {@code callable} parameter's
471    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method
472    * @return a map of result values keyed by region name
473    */
474   @InterfaceAudience.Private // TODO add coproc audience level
475   <T extends Service, R> Map<byte[],R> coprocessorService(final Class<T> service,
476       byte[] startKey, byte[] endKey, final Batch.Call<T,R> callable)
477       throws ServiceException, Throwable;
478 
479   /**
480    * Creates an instance of the given {@link com.google.protobuf.Service} subclass for each table
481    * region spanning the range from the {@code startKey} row to {@code endKey} row (inclusive),
482    * and invokes the passed {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call}
483    * method with each {@link Service} instance.
484    *
485    * <p>
486    * The given
487    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Callback#update(byte[], byte[], Object)}
488    * method will be called with the return value from each region's
489    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} invocation.
490    *</p>
491    *
492    * @param service the protocol buffer {@code Service} implementation to call
493    * @param startKey start region selection with region containing this row.  If {@code null}, the
494    *                 selection will start with the first table region.
495    * @param endKey select regions up to and including the region containing this row.
496    *               If {@code null}, selection will continue through the last table region.
497    * @param callable this instance's
498    *                 {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method
499    *                 will be invoked once per table region, using the {@link Service} instance
500    *                 connected to that region.
501    * @param callback
502    * @param <T> the {@link Service} subclass to connect to
503    * @param <R> Return type for the {@code callable} parameter's
504    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method
505    */
506   @InterfaceAudience.Private // TODO add coproc audience level
507   <T extends Service, R> void coprocessorService(final Class<T> service,
508       byte[] startKey, byte[] endKey, final Batch.Call<T,R> callable,
509       final Batch.Callback<R> callback) throws ServiceException, Throwable;
510 
511   /**
512    * See {@link #setAutoFlush(boolean, boolean)}
513    *
514    * @param autoFlush
515    *          Whether or not to enable 'auto-flush'.
516    * @deprecated in 0.96. When called with setAutoFlush(false), this function also
517    *  set clearBufferOnFail to true, which is unexpected but kept for historical reasons.
518    *  Replace it with setAutoFlush(false, false) if this is exactly what you want, or by
519    *  {@link #setAutoFlushTo(boolean)} for all other cases.
520    */
521   @Deprecated
522   void setAutoFlush(boolean autoFlush);
523 
524   /**
525    * Turns 'auto-flush' on or off.
526    * <p>
527    * When enabled (default), {@link Put} operations don't get buffered/delayed
528    * and are immediately executed. Failed operations are not retried. This is
529    * slower but safer.
530    * <p>
531    * Turning off {@code #autoFlush} means that multiple {@link Put}s will be
532    * accepted before any RPC is actually sent to do the write operations. If the
533    * application dies before pending writes get flushed to HBase, data will be
534    * lost.
535    * <p>
536    * When you turn {@code #autoFlush} off, you should also consider the
537    * {@code #clearBufferOnFail} option. By default, asynchronous {@link Put}
538    * requests will be retried on failure until successful. However, this can
539    * pollute the writeBuffer and slow down batching performance. Additionally,
540    * you may want to issue a number of Put requests and call
541    * {@link #flushCommits()} as a barrier. In both use cases, consider setting
542    * clearBufferOnFail to true to erase the buffer after {@link #flushCommits()}
543    * has been called, regardless of success.
544    * <p>
545    * In other words, if you call {@code #setAutoFlush(false)}; HBase will retry N time for each
546    *  flushCommit, including the last one when closing the table. This is NOT recommended,
547    *  most of the time you want to call {@code #setAutoFlush(false, true)}.
548    *
549    * @param autoFlush
550    *          Whether or not to enable 'auto-flush'.
551    * @param clearBufferOnFail
552    *          Whether to keep Put failures in the writeBuffer. If autoFlush is true, then
553    *          the value of this parameter is ignored and clearBufferOnFail is set to true.
554    *          Setting clearBufferOnFail to false is deprecated since 0.96.
555    * @see #flushCommits
556    */
557   void setAutoFlush(boolean autoFlush, boolean clearBufferOnFail);
558 
559   /**
560    * Set the autoFlush behavior, without changing the value of {@code clearBufferOnFail}
561    */
562   void setAutoFlushTo(boolean autoFlush);
563 
564   /**
565    * Returns the maximum size in bytes of the write buffer for this HTable.
566    * <p>
567    * The default value comes from the configuration parameter
568    * {@code hbase.client.write.buffer}.
569    * @return The size of the write buffer in bytes.
570    */
571   long getWriteBufferSize();
572 
573   /**
574    * Sets the size of the buffer in bytes.
575    * <p>
576    * If the new size is less than the current amount of data in the
577    * write buffer, the buffer gets flushed.
578    * @param writeBufferSize The new write buffer size, in bytes.
579    * @throws IOException if a remote or network exception occurs.
580    */
581   void setWriteBufferSize(long writeBufferSize) throws IOException;
582 }