1 /** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 package org.apache.hadoop.hbase.client; 20 21 import com.google.protobuf.Service; 22 import com.google.protobuf.ServiceException; 23 24 import org.apache.hadoop.classification.InterfaceAudience; 25 import org.apache.hadoop.classification.InterfaceStability; 26 import org.apache.hadoop.conf.Configuration; 27 import org.apache.hadoop.hbase.TableName; 28 import org.apache.hadoop.hbase.HTableDescriptor; 29 import org.apache.hadoop.hbase.KeyValue; 30 import org.apache.hadoop.hbase.client.coprocessor.Batch; 31 import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel; 32 33 import java.io.Closeable; 34 import java.io.IOException; 35 import java.util.List; 36 import java.util.Map; 37 38 /** 39 * Used to communicate with a single HBase table. 40 * Obtain an instance from an {@link HConnection}. 41 * 42 * @since 0.21.0 43 */ 44 @InterfaceAudience.Public 45 @InterfaceStability.Stable 46 public interface HTableInterface extends Closeable { 47 48 /** 49 * Gets the name of this table. 50 * 51 * @return the table name. 52 */ 53 byte[] getTableName(); 54 55 /** 56 * Gets the fully qualified table name instance of this table. 57 */ 58 TableName getName(); 59 60 /** 61 * Returns the {@link Configuration} object used by this instance. 62 * <p> 63 * The reference returned is not a copy, so any change made to it will 64 * affect this instance. 65 */ 66 Configuration getConfiguration(); 67 68 /** 69 * Gets the {@link HTableDescriptor table descriptor} for this table. 70 * @throws IOException if a remote or network exception occurs. 71 */ 72 HTableDescriptor getTableDescriptor() throws IOException; 73 74 /** 75 * Test for the existence of columns in the table, as specified by the Get. 76 * <p> 77 * 78 * This will return true if the Get matches one or more keys, false if not. 79 * <p> 80 * 81 * This is a server-side call so it prevents any data from being transfered to 82 * the client. 83 * 84 * @param get the Get 85 * @return true if the specified Get matches one or more keys, false if not 86 * @throws IOException e 87 */ 88 boolean exists(Get get) throws IOException; 89 90 /** 91 * Test for the existence of columns in the table, as specified by the Gets. 92 * <p> 93 * 94 * This will return an array of booleans. Each value will be true if the related Get matches 95 * one or more keys, false if not. 96 * <p> 97 * 98 * This is a server-side call so it prevents any data from being transfered to 99 * the client. 100 * 101 * @param gets the Gets 102 * @return Array of Boolean true if the specified Get matches one or more keys, false if not 103 * @throws IOException e 104 */ 105 Boolean[] exists(List<Get> gets) throws IOException; 106 107 /** 108 * Method that does a batch call on Deletes, Gets, Puts, Increments, Appends and RowMutations. 109 * The ordering of execution of the actions is not defined. Meaning if you do a Put and a 110 * Get in the same {@link #batch} call, you will not necessarily be 111 * guaranteed that the Get returns what the Put had put. 112 * 113 * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects 114 * @param results Empty Object[], same size as actions. Provides access to partial 115 * results, in case an exception is thrown. A null in the result array means that 116 * the call for that action failed, even after retries 117 * @throws IOException 118 * @since 0.90.0 119 */ 120 void batch(final List<?extends Row> actions, final Object[] results) throws IOException, InterruptedException; 121 122 /** 123 * Same as {@link #batch(List, Object[])}, but returns an array of 124 * results instead of using a results parameter reference. 125 * 126 * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects 127 * @return the results from the actions. A null in the return array means that 128 * the call for that action failed, even after retries 129 * @throws IOException 130 * @since 0.90.0 131 */ 132 Object[] batch(final List<? extends Row> actions) throws IOException, InterruptedException; 133 134 /** 135 * Same as {@link #batch(List, Object[])}, but with a callback. 136 * @since 0.96.0 137 */ 138 <R> void batchCallback( 139 final List<? extends Row> actions, final Object[] results, final Batch.Callback<R> callback 140 ) 141 throws IOException, InterruptedException; 142 143 144 /** 145 * Same as {@link #batch(List)}, but with a callback. 146 * @since 0.96.0 147 */ 148 <R> Object[] batchCallback( 149 List<? extends Row> actions, Batch.Callback<R> callback 150 ) throws IOException, 151 InterruptedException; 152 153 /** 154 * Extracts certain cells from a given row. 155 * @param get The object that specifies what data to fetch and from which row. 156 * @return The data coming from the specified row, if it exists. If the row 157 * specified doesn't exist, the {@link Result} instance returned won't 158 * contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}. 159 * @throws IOException if a remote or network exception occurs. 160 * @since 0.20.0 161 */ 162 Result get(Get get) throws IOException; 163 164 /** 165 * Extracts certain cells from the given rows, in batch. 166 * 167 * @param gets The objects that specify what data to fetch and from which rows. 168 * 169 * @return The data coming from the specified rows, if it exists. If the row 170 * specified doesn't exist, the {@link Result} instance returned won't 171 * contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}. 172 * If there are any failures even after retries, there will be a null in 173 * the results array for those Gets, AND an exception will be thrown. 174 * @throws IOException if a remote or network exception occurs. 175 * 176 * @since 0.90.0 177 */ 178 Result[] get(List<Get> gets) throws IOException; 179 180 /** 181 * Return the row that matches <i>row</i> exactly, 182 * or the one that immediately precedes it. 183 * 184 * @param row A row key. 185 * @param family Column family to include in the {@link Result}. 186 * @throws IOException if a remote or network exception occurs. 187 * @since 0.20.0 188 * 189 * @deprecated As of version 0.92 this method is deprecated without 190 * replacement. 191 * getRowOrBefore is used internally to find entries in hbase:meta and makes 192 * various assumptions about the table (which are true for hbase:meta but not 193 * in general) to be efficient. 194 */ 195 Result getRowOrBefore(byte[] row, byte[] family) throws IOException; 196 197 /** 198 * Returns a scanner on the current table as specified by the {@link Scan} 199 * object. 200 * Note that the passed {@link Scan}'s start row and caching properties 201 * maybe changed. 202 * 203 * @param scan A configured {@link Scan} object. 204 * @return A scanner. 205 * @throws IOException if a remote or network exception occurs. 206 * @since 0.20.0 207 */ 208 ResultScanner getScanner(Scan scan) throws IOException; 209 210 /** 211 * Gets a scanner on the current table for the given family. 212 * 213 * @param family The column family to scan. 214 * @return A scanner. 215 * @throws IOException if a remote or network exception occurs. 216 * @since 0.20.0 217 */ 218 ResultScanner getScanner(byte[] family) throws IOException; 219 220 /** 221 * Gets a scanner on the current table for the given family and qualifier. 222 * 223 * @param family The column family to scan. 224 * @param qualifier The column qualifier to scan. 225 * @return A scanner. 226 * @throws IOException if a remote or network exception occurs. 227 * @since 0.20.0 228 */ 229 ResultScanner getScanner(byte[] family, byte[] qualifier) throws IOException; 230 231 232 /** 233 * Puts some data in the table. 234 * <p> 235 * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered 236 * until the internal buffer is full. 237 * @param put The data to put. 238 * @throws IOException if a remote or network exception occurs. 239 * @since 0.20.0 240 */ 241 void put(Put put) throws IOException; 242 243 /** 244 * Puts some data in the table, in batch. 245 * <p> 246 * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered 247 * until the internal buffer is full. 248 * <p> 249 * This can be used for group commit, or for submitting user defined 250 * batches. The writeBuffer will be periodically inspected while the List 251 * is processed, so depending on the List size the writeBuffer may flush 252 * not at all, or more than once. 253 * @param puts The list of mutations to apply. The batch put is done by 254 * aggregating the iteration of the Puts over the write buffer 255 * at the client-side for a single RPC call. 256 * @throws IOException if a remote or network exception occurs. 257 * @since 0.20.0 258 */ 259 void put(List<Put> puts) throws IOException; 260 261 /** 262 * Atomically checks if a row/family/qualifier value matches the expected 263 * value. If it does, it adds the put. If the passed value is null, the check 264 * is for the lack of column (ie: non-existance) 265 * 266 * @param row to check 267 * @param family column family to check 268 * @param qualifier column qualifier to check 269 * @param value the expected value 270 * @param put data to put if check succeeds 271 * @throws IOException e 272 * @return true if the new put was executed, false otherwise 273 */ 274 boolean checkAndPut(byte[] row, byte[] family, byte[] qualifier, 275 byte[] value, Put put) throws IOException; 276 277 /** 278 * Deletes the specified cells/row. 279 * 280 * @param delete The object that specifies what to delete. 281 * @throws IOException if a remote or network exception occurs. 282 * @since 0.20.0 283 */ 284 void delete(Delete delete) throws IOException; 285 286 /** 287 * Deletes the specified cells/rows in bulk. 288 * @param deletes List of things to delete. List gets modified by this 289 * method (in particular it gets re-ordered, so the order in which the elements 290 * are inserted in the list gives no guarantee as to the order in which the 291 * {@link Delete}s are executed). 292 * @throws IOException if a remote or network exception occurs. In that case 293 * the {@code deletes} argument will contain the {@link Delete} instances 294 * that have not be successfully applied. 295 * @since 0.20.1 296 */ 297 void delete(List<Delete> deletes) throws IOException; 298 299 /** 300 * Atomically checks if a row/family/qualifier value matches the expected 301 * value. If it does, it adds the delete. If the passed value is null, the 302 * check is for the lack of column (ie: non-existance) 303 * 304 * @param row to check 305 * @param family column family to check 306 * @param qualifier column qualifier to check 307 * @param value the expected value 308 * @param delete data to delete if check succeeds 309 * @throws IOException e 310 * @return true if the new delete was executed, false otherwise 311 */ 312 boolean checkAndDelete(byte[] row, byte[] family, byte[] qualifier, 313 byte[] value, Delete delete) throws IOException; 314 315 /** 316 * Performs multiple mutations atomically on a single row. Currently 317 * {@link Put} and {@link Delete} are supported. 318 * 319 * @param rm object that specifies the set of mutations to perform atomically 320 * @throws IOException 321 */ 322 void mutateRow(final RowMutations rm) throws IOException; 323 324 /** 325 * Appends values to one or more columns within a single row. 326 * <p> 327 * This operation does not appear atomic to readers. Appends are done 328 * under a single row lock, so write operations to a row are synchronized, but 329 * readers do not take row locks so get and scan operations can see this 330 * operation partially completed. 331 * 332 * @param append object that specifies the columns and amounts to be used 333 * for the increment operations 334 * @throws IOException e 335 * @return values of columns after the append operation (maybe null) 336 */ 337 Result append(final Append append) throws IOException; 338 339 /** 340 * Increments one or more columns within a single row. 341 * <p> 342 * This operation does not appear atomic to readers. Increments are done 343 * under a single row lock, so write operations to a row are synchronized, but 344 * readers do not take row locks so get and scan operations can see this 345 * operation partially completed. 346 * 347 * @param increment object that specifies the columns and amounts to be used 348 * for the increment operations 349 * @throws IOException e 350 * @return values of columns after the increment 351 */ 352 Result increment(final Increment increment) throws IOException; 353 354 /** 355 * See {@link #incrementColumnValue(byte[], byte[], byte[], long, Durability)} 356 * <p> 357 * The {@link Durability} is defaulted to {@link Durability#SYNC_WAL}. 358 * @param row The row that contains the cell to increment. 359 * @param family The column family of the cell to increment. 360 * @param qualifier The column qualifier of the cell to increment. 361 * @param amount The amount to increment the cell with (or decrement, if the 362 * amount is negative). 363 * @return The new value, post increment. 364 * @throws IOException if a remote or network exception occurs. 365 */ 366 long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier, 367 long amount) throws IOException; 368 369 /** 370 * Atomically increments a column value. If the column value already exists 371 * and is not a big-endian long, this could throw an exception. If the column 372 * value does not yet exist it is initialized to <code>amount</code> and 373 * written to the specified column. 374 * 375 * <p>Setting durability to {@link Durability#SKIP_WAL} means that in a fail 376 * scenario you will lose any increments that have not been flushed. 377 * @param row The row that contains the cell to increment. 378 * @param family The column family of the cell to increment. 379 * @param qualifier The column qualifier of the cell to increment. 380 * @param amount The amount to increment the cell with (or decrement, if the 381 * amount is negative). 382 * @param durability The persistence guarantee for this increment. 383 * @return The new value, post increment. 384 * @throws IOException if a remote or network exception occurs. 385 */ 386 long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier, 387 long amount, Durability durability) throws IOException; 388 389 /** 390 * @deprecated Use {@link #incrementColumnValue(byte[], byte[], byte[], long, Durability)} 391 */ 392 @Deprecated 393 long incrementColumnValue(final byte [] row, final byte [] family, 394 final byte [] qualifier, final long amount, final boolean writeToWAL) 395 throws IOException; 396 397 /** 398 * Tells whether or not 'auto-flush' is turned on. 399 * 400 * @return {@code true} if 'auto-flush' is enabled (default), meaning 401 * {@link Put} operations don't get buffered/delayed and are immediately 402 * executed. 403 */ 404 boolean isAutoFlush(); 405 406 /** 407 * Executes all the buffered {@link Put} operations. 408 * <p> 409 * This method gets called once automatically for every {@link Put} or batch 410 * of {@link Put}s (when <code>put(List<Put>)</code> is used) when 411 * {@link #isAutoFlush} is {@code true}. 412 * @throws IOException if a remote or network exception occurs. 413 */ 414 void flushCommits() throws IOException; 415 416 /** 417 * Releases any resources held or pending changes in internal buffers. 418 * 419 * @throws IOException if a remote or network exception occurs. 420 */ 421 void close() throws IOException; 422 423 /** 424 * Creates and returns a {@link com.google.protobuf.RpcChannel} instance connected to the 425 * table region containing the specified row. The row given does not actually have 426 * to exist. Whichever region would contain the row based on start and end keys will 427 * be used. Note that the {@code row} parameter is also not passed to the 428 * coprocessor handler registered for this protocol, unless the {@code row} 429 * is separately passed as an argument in the service request. The parameter 430 * here is only used to locate the region used to handle the call. 431 * 432 * <p> 433 * The obtained {@link com.google.protobuf.RpcChannel} instance can be used to access a published 434 * coprocessor {@link com.google.protobuf.Service} using standard protobuf service invocations: 435 * </p> 436 * 437 * <div style="background-color: #cccccc; padding: 2px"> 438 * <blockquote><pre> 439 * CoprocessorRpcChannel channel = myTable.coprocessorService(rowkey); 440 * MyService.BlockingInterface service = MyService.newBlockingStub(channel); 441 * MyCallRequest request = MyCallRequest.newBuilder() 442 * ... 443 * .build(); 444 * MyCallResponse response = service.myCall(null, request); 445 * </pre></blockquote></div> 446 * 447 * @param row The row key used to identify the remote region location 448 * @return A CoprocessorRpcChannel instance 449 */ 450 @InterfaceAudience.Private // TODO add coproc audience level 451 CoprocessorRpcChannel coprocessorService(byte[] row); 452 453 /** 454 * Creates an instance of the given {@link com.google.protobuf.Service} subclass for each table 455 * region spanning the range from the {@code startKey} row to {@code endKey} row (inclusive), 456 * and invokes the passed {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} 457 * method with each {@link Service} 458 * instance. 459 * 460 * @param service the protocol buffer {@code Service} implementation to call 461 * @param startKey start region selection with region containing this row. If {@code null}, the 462 * selection will start with the first table region. 463 * @param endKey select regions up to and including the region containing this row. 464 * If {@code null}, selection will continue through the last table region. 465 * @param callable this instance's 466 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} 467 * method will be invoked once per table region, using the {@link Service} 468 * instance connected to that region. 469 * @param <T> the {@link Service} subclass to connect to 470 * @param <R> Return type for the {@code callable} parameter's 471 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method 472 * @return a map of result values keyed by region name 473 */ 474 @InterfaceAudience.Private // TODO add coproc audience level 475 <T extends Service, R> Map<byte[],R> coprocessorService(final Class<T> service, 476 byte[] startKey, byte[] endKey, final Batch.Call<T,R> callable) 477 throws ServiceException, Throwable; 478 479 /** 480 * Creates an instance of the given {@link com.google.protobuf.Service} subclass for each table 481 * region spanning the range from the {@code startKey} row to {@code endKey} row (inclusive), 482 * and invokes the passed {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} 483 * method with each {@link Service} instance. 484 * 485 * <p> 486 * The given 487 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Callback#update(byte[], byte[], Object)} 488 * method will be called with the return value from each region's 489 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} invocation. 490 *</p> 491 * 492 * @param service the protocol buffer {@code Service} implementation to call 493 * @param startKey start region selection with region containing this row. If {@code null}, the 494 * selection will start with the first table region. 495 * @param endKey select regions up to and including the region containing this row. 496 * If {@code null}, selection will continue through the last table region. 497 * @param callable this instance's 498 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method 499 * will be invoked once per table region, using the {@link Service} instance 500 * connected to that region. 501 * @param callback 502 * @param <T> the {@link Service} subclass to connect to 503 * @param <R> Return type for the {@code callable} parameter's 504 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method 505 */ 506 @InterfaceAudience.Private // TODO add coproc audience level 507 <T extends Service, R> void coprocessorService(final Class<T> service, 508 byte[] startKey, byte[] endKey, final Batch.Call<T,R> callable, 509 final Batch.Callback<R> callback) throws ServiceException, Throwable; 510 511 /** 512 * See {@link #setAutoFlush(boolean, boolean)} 513 * 514 * @param autoFlush 515 * Whether or not to enable 'auto-flush'. 516 * @deprecated in 0.96. When called with setAutoFlush(false), this function also 517 * set clearBufferOnFail to true, which is unexpected but kept for historical reasons. 518 * Replace it with setAutoFlush(false, false) if this is exactly what you want, or by 519 * {@link #setAutoFlushTo(boolean)} for all other cases. 520 */ 521 @Deprecated 522 void setAutoFlush(boolean autoFlush); 523 524 /** 525 * Turns 'auto-flush' on or off. 526 * <p> 527 * When enabled (default), {@link Put} operations don't get buffered/delayed 528 * and are immediately executed. Failed operations are not retried. This is 529 * slower but safer. 530 * <p> 531 * Turning off {@code #autoFlush} means that multiple {@link Put}s will be 532 * accepted before any RPC is actually sent to do the write operations. If the 533 * application dies before pending writes get flushed to HBase, data will be 534 * lost. 535 * <p> 536 * When you turn {@code #autoFlush} off, you should also consider the 537 * {@code #clearBufferOnFail} option. By default, asynchronous {@link Put} 538 * requests will be retried on failure until successful. However, this can 539 * pollute the writeBuffer and slow down batching performance. Additionally, 540 * you may want to issue a number of Put requests and call 541 * {@link #flushCommits()} as a barrier. In both use cases, consider setting 542 * clearBufferOnFail to true to erase the buffer after {@link #flushCommits()} 543 * has been called, regardless of success. 544 * <p> 545 * In other words, if you call {@code #setAutoFlush(false)}; HBase will retry N time for each 546 * flushCommit, including the last one when closing the table. This is NOT recommended, 547 * most of the time you want to call {@code #setAutoFlush(false, true)}. 548 * 549 * @param autoFlush 550 * Whether or not to enable 'auto-flush'. 551 * @param clearBufferOnFail 552 * Whether to keep Put failures in the writeBuffer. If autoFlush is true, then 553 * the value of this parameter is ignored and clearBufferOnFail is set to true. 554 * Setting clearBufferOnFail to false is deprecated since 0.96. 555 * @see #flushCommits 556 */ 557 void setAutoFlush(boolean autoFlush, boolean clearBufferOnFail); 558 559 /** 560 * Set the autoFlush behavior, without changing the value of {@code clearBufferOnFail} 561 */ 562 void setAutoFlushTo(boolean autoFlush); 563 564 /** 565 * Returns the maximum size in bytes of the write buffer for this HTable. 566 * <p> 567 * The default value comes from the configuration parameter 568 * {@code hbase.client.write.buffer}. 569 * @return The size of the write buffer in bytes. 570 */ 571 long getWriteBufferSize(); 572 573 /** 574 * Sets the size of the buffer in bytes. 575 * <p> 576 * If the new size is less than the current amount of data in the 577 * write buffer, the buffer gets flushed. 578 * @param writeBufferSize The new write buffer size, in bytes. 579 * @throws IOException if a remote or network exception occurs. 580 */ 581 void setWriteBufferSize(long writeBufferSize) throws IOException; 582 }