1 /** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 package org.apache.hadoop.hbase.client; 20 21 import com.google.protobuf.Service; 22 import com.google.protobuf.ServiceException; 23 24 import org.apache.hadoop.classification.InterfaceAudience; 25 import org.apache.hadoop.classification.InterfaceStability; 26 import org.apache.hadoop.conf.Configuration; 27 import org.apache.hadoop.hbase.TableName; 28 import org.apache.hadoop.hbase.HTableDescriptor; 29 import org.apache.hadoop.hbase.KeyValue; 30 import org.apache.hadoop.hbase.client.coprocessor.Batch; 31 import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel; 32 33 import java.io.Closeable; 34 import java.io.IOException; 35 import java.util.List; 36 import java.util.Map; 37 38 /** 39 * Used to communicate with a single HBase table. 40 * Obtain an instance from an {@link HConnection}. 41 * 42 * @since 0.21.0 43 */ 44 @InterfaceAudience.Public 45 @InterfaceStability.Stable 46 public interface HTableInterface extends Closeable { 47 48 /** 49 * Gets the name of this table. 50 * 51 * @return the table name. 52 */ 53 byte[] getTableName(); 54 55 /** 56 * Gets the fully qualified table name instance of this table. 57 */ 58 TableName getName(); 59 60 /** 61 * Returns the {@link Configuration} object used by this instance. 62 * <p> 63 * The reference returned is not a copy, so any change made to it will 64 * affect this instance. 65 */ 66 Configuration getConfiguration(); 67 68 /** 69 * Gets the {@link HTableDescriptor table descriptor} for this table. 70 * @throws IOException if a remote or network exception occurs. 71 */ 72 HTableDescriptor getTableDescriptor() throws IOException; 73 74 /** 75 * Test for the existence of columns in the table, as specified by the Get. 76 * <p> 77 * 78 * This will return true if the Get matches one or more keys, false if not. 79 * <p> 80 * 81 * This is a server-side call so it prevents any data from being transfered to 82 * the client. 83 * 84 * @param get the Get 85 * @return true if the specified Get matches one or more keys, false if not 86 * @throws IOException e 87 */ 88 boolean exists(Get get) throws IOException; 89 90 /** 91 * Test for the existence of columns in the table, as specified by the Gets. 92 * <p> 93 * 94 * This will return an array of booleans. Each value will be true if the related Get matches 95 * one or more keys, false if not. 96 * <p> 97 * 98 * This is a server-side call so it prevents any data from being transfered to 99 * the client. 100 * 101 * @param gets the Gets 102 * @return Array of Boolean true if the specified Get matches one or more keys, false if not 103 * @throws IOException e 104 */ 105 Boolean[] exists(List<Get> gets) throws IOException; 106 107 /** 108 * Method that does a batch call on Deletes, Gets, Puts, Increments, Appends and RowMutations. 109 * The ordering of execution of the actions is not defined. Meaning if you do a Put and a 110 * Get in the same {@link #batch} call, you will not necessarily be 111 * guaranteed that the Get returns what the Put had put. 112 * 113 * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects 114 * @param results Empty Object[], same size as actions. Provides access to partial 115 * results, in case an exception is thrown. A null in the result array means that 116 * the call for that action failed, even after retries 117 * @throws IOException 118 * @since 0.90.0 119 */ 120 void batch(final List<?extends Row> actions, final Object[] results) throws IOException, InterruptedException; 121 122 /** 123 * Same as {@link #batch(List, Object[])}, but returns an array of 124 * results instead of using a results parameter reference. 125 * 126 * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects 127 * @return the results from the actions. A null in the return array means that 128 * the call for that action failed, even after retries 129 * @throws IOException 130 * @since 0.90.0 131 * @deprecated If any exception is thrown by one of the actions, there is no way to 132 * retrieve the partially executed results. Use {@link #batch(List, Object[])} instead. 133 */ 134 Object[] batch(final List<? extends Row> actions) throws IOException, InterruptedException; 135 136 /** 137 * Same as {@link #batch(List, Object[])}, but with a callback. 138 * @since 0.96.0 139 */ 140 <R> void batchCallback( 141 final List<? extends Row> actions, final Object[] results, final Batch.Callback<R> callback 142 ) 143 throws IOException, InterruptedException; 144 145 146 /** 147 * Same as {@link #batch(List)}, but with a callback. 148 * @since 0.96.0 149 * @deprecated If any exception is thrown by one of the actions, there is no way to 150 * retrieve the partially executed results. Use 151 * {@link #batchCallback(List, Object[], org.apache.hadoop.hbase.client.coprocessor.Batch.Callback)} 152 * instead. 153 */ 154 <R> Object[] batchCallback( 155 List<? extends Row> actions, Batch.Callback<R> callback 156 ) throws IOException, 157 InterruptedException; 158 159 /** 160 * Extracts certain cells from a given row. 161 * @param get The object that specifies what data to fetch and from which row. 162 * @return The data coming from the specified row, if it exists. If the row 163 * specified doesn't exist, the {@link Result} instance returned won't 164 * contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}. 165 * @throws IOException if a remote or network exception occurs. 166 * @since 0.20.0 167 */ 168 Result get(Get get) throws IOException; 169 170 /** 171 * Extracts certain cells from the given rows, in batch. 172 * 173 * @param gets The objects that specify what data to fetch and from which rows. 174 * 175 * @return The data coming from the specified rows, if it exists. If the row 176 * specified doesn't exist, the {@link Result} instance returned won't 177 * contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}. 178 * If there are any failures even after retries, there will be a null in 179 * the results array for those Gets, AND an exception will be thrown. 180 * @throws IOException if a remote or network exception occurs. 181 * 182 * @since 0.90.0 183 */ 184 Result[] get(List<Get> gets) throws IOException; 185 186 /** 187 * Return the row that matches <i>row</i> exactly, 188 * or the one that immediately precedes it. 189 * 190 * @param row A row key. 191 * @param family Column family to include in the {@link Result}. 192 * @throws IOException if a remote or network exception occurs. 193 * @since 0.20.0 194 * 195 * @deprecated As of version 0.92 this method is deprecated without 196 * replacement. 197 * getRowOrBefore is used internally to find entries in hbase:meta and makes 198 * various assumptions about the table (which are true for hbase:meta but not 199 * in general) to be efficient. 200 */ 201 Result getRowOrBefore(byte[] row, byte[] family) throws IOException; 202 203 /** 204 * Returns a scanner on the current table as specified by the {@link Scan} 205 * object. 206 * Note that the passed {@link Scan}'s start row and caching properties 207 * maybe changed. 208 * 209 * @param scan A configured {@link Scan} object. 210 * @return A scanner. 211 * @throws IOException if a remote or network exception occurs. 212 * @since 0.20.0 213 */ 214 ResultScanner getScanner(Scan scan) throws IOException; 215 216 /** 217 * Gets a scanner on the current table for the given family. 218 * 219 * @param family The column family to scan. 220 * @return A scanner. 221 * @throws IOException if a remote or network exception occurs. 222 * @since 0.20.0 223 */ 224 ResultScanner getScanner(byte[] family) throws IOException; 225 226 /** 227 * Gets a scanner on the current table for the given family and qualifier. 228 * 229 * @param family The column family to scan. 230 * @param qualifier The column qualifier to scan. 231 * @return A scanner. 232 * @throws IOException if a remote or network exception occurs. 233 * @since 0.20.0 234 */ 235 ResultScanner getScanner(byte[] family, byte[] qualifier) throws IOException; 236 237 238 /** 239 * Puts some data in the table. 240 * <p> 241 * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered 242 * until the internal buffer is full. 243 * @param put The data to put. 244 * @throws IOException if a remote or network exception occurs. 245 * @since 0.20.0 246 */ 247 void put(Put put) throws IOException; 248 249 /** 250 * Puts some data in the table, in batch. 251 * <p> 252 * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered 253 * until the internal buffer is full. 254 * <p> 255 * This can be used for group commit, or for submitting user defined 256 * batches. The writeBuffer will be periodically inspected while the List 257 * is processed, so depending on the List size the writeBuffer may flush 258 * not at all, or more than once. 259 * @param puts The list of mutations to apply. The batch put is done by 260 * aggregating the iteration of the Puts over the write buffer 261 * at the client-side for a single RPC call. 262 * @throws IOException if a remote or network exception occurs. 263 * @since 0.20.0 264 */ 265 void put(List<Put> puts) throws IOException; 266 267 /** 268 * Atomically checks if a row/family/qualifier value matches the expected 269 * value. If it does, it adds the put. If the passed value is null, the check 270 * is for the lack of column (ie: non-existance) 271 * 272 * @param row to check 273 * @param family column family to check 274 * @param qualifier column qualifier to check 275 * @param value the expected value 276 * @param put data to put if check succeeds 277 * @throws IOException e 278 * @return true if the new put was executed, false otherwise 279 */ 280 boolean checkAndPut(byte[] row, byte[] family, byte[] qualifier, 281 byte[] value, Put put) throws IOException; 282 283 /** 284 * Deletes the specified cells/row. 285 * 286 * @param delete The object that specifies what to delete. 287 * @throws IOException if a remote or network exception occurs. 288 * @since 0.20.0 289 */ 290 void delete(Delete delete) throws IOException; 291 292 /** 293 * Deletes the specified cells/rows in bulk. 294 * @param deletes List of things to delete. List gets modified by this 295 * method (in particular it gets re-ordered, so the order in which the elements 296 * are inserted in the list gives no guarantee as to the order in which the 297 * {@link Delete}s are executed). 298 * @throws IOException if a remote or network exception occurs. In that case 299 * the {@code deletes} argument will contain the {@link Delete} instances 300 * that have not be successfully applied. 301 * @since 0.20.1 302 */ 303 void delete(List<Delete> deletes) throws IOException; 304 305 /** 306 * Atomically checks if a row/family/qualifier value matches the expected 307 * value. If it does, it adds the delete. If the passed value is null, the 308 * check is for the lack of column (ie: non-existance) 309 * 310 * @param row to check 311 * @param family column family to check 312 * @param qualifier column qualifier to check 313 * @param value the expected value 314 * @param delete data to delete if check succeeds 315 * @throws IOException e 316 * @return true if the new delete was executed, false otherwise 317 */ 318 boolean checkAndDelete(byte[] row, byte[] family, byte[] qualifier, 319 byte[] value, Delete delete) throws IOException; 320 321 /** 322 * Performs multiple mutations atomically on a single row. Currently 323 * {@link Put} and {@link Delete} are supported. 324 * 325 * @param rm object that specifies the set of mutations to perform atomically 326 * @throws IOException 327 */ 328 void mutateRow(final RowMutations rm) throws IOException; 329 330 /** 331 * Appends values to one or more columns within a single row. 332 * <p> 333 * This operation does not appear atomic to readers. Appends are done 334 * under a single row lock, so write operations to a row are synchronized, but 335 * readers do not take row locks so get and scan operations can see this 336 * operation partially completed. 337 * 338 * @param append object that specifies the columns and amounts to be used 339 * for the increment operations 340 * @throws IOException e 341 * @return values of columns after the append operation (maybe null) 342 */ 343 Result append(final Append append) throws IOException; 344 345 /** 346 * Increments one or more columns within a single row. 347 * <p> 348 * This operation does not appear atomic to readers. Increments are done 349 * under a single row lock, so write operations to a row are synchronized, but 350 * readers do not take row locks so get and scan operations can see this 351 * operation partially completed. 352 * 353 * @param increment object that specifies the columns and amounts to be used 354 * for the increment operations 355 * @throws IOException e 356 * @return values of columns after the increment 357 */ 358 Result increment(final Increment increment) throws IOException; 359 360 /** 361 * See {@link #incrementColumnValue(byte[], byte[], byte[], long, Durability)} 362 * <p> 363 * The {@link Durability} is defaulted to {@link Durability#SYNC_WAL}. 364 * @param row The row that contains the cell to increment. 365 * @param family The column family of the cell to increment. 366 * @param qualifier The column qualifier of the cell to increment. 367 * @param amount The amount to increment the cell with (or decrement, if the 368 * amount is negative). 369 * @return The new value, post increment. 370 * @throws IOException if a remote or network exception occurs. 371 */ 372 long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier, 373 long amount) throws IOException; 374 375 /** 376 * Atomically increments a column value. If the column value already exists 377 * and is not a big-endian long, this could throw an exception. If the column 378 * value does not yet exist it is initialized to <code>amount</code> and 379 * written to the specified column. 380 * 381 * <p>Setting durability to {@link Durability#SKIP_WAL} means that in a fail 382 * scenario you will lose any increments that have not been flushed. 383 * @param row The row that contains the cell to increment. 384 * @param family The column family of the cell to increment. 385 * @param qualifier The column qualifier of the cell to increment. 386 * @param amount The amount to increment the cell with (or decrement, if the 387 * amount is negative). 388 * @param durability The persistence guarantee for this increment. 389 * @return The new value, post increment. 390 * @throws IOException if a remote or network exception occurs. 391 */ 392 long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier, 393 long amount, Durability durability) throws IOException; 394 395 /** 396 * @deprecated Use {@link #incrementColumnValue(byte[], byte[], byte[], long, Durability)} 397 */ 398 @Deprecated 399 long incrementColumnValue(final byte [] row, final byte [] family, 400 final byte [] qualifier, final long amount, final boolean writeToWAL) 401 throws IOException; 402 403 /** 404 * Tells whether or not 'auto-flush' is turned on. 405 * 406 * @return {@code true} if 'auto-flush' is enabled (default), meaning 407 * {@link Put} operations don't get buffered/delayed and are immediately 408 * executed. 409 */ 410 boolean isAutoFlush(); 411 412 /** 413 * Executes all the buffered {@link Put} operations. 414 * <p> 415 * This method gets called once automatically for every {@link Put} or batch 416 * of {@link Put}s (when <code>put(List<Put>)</code> is used) when 417 * {@link #isAutoFlush} is {@code true}. 418 * @throws IOException if a remote or network exception occurs. 419 */ 420 void flushCommits() throws IOException; 421 422 /** 423 * Releases any resources held or pending changes in internal buffers. 424 * 425 * @throws IOException if a remote or network exception occurs. 426 */ 427 void close() throws IOException; 428 429 /** 430 * Creates and returns a {@link com.google.protobuf.RpcChannel} instance connected to the 431 * table region containing the specified row. The row given does not actually have 432 * to exist. Whichever region would contain the row based on start and end keys will 433 * be used. Note that the {@code row} parameter is also not passed to the 434 * coprocessor handler registered for this protocol, unless the {@code row} 435 * is separately passed as an argument in the service request. The parameter 436 * here is only used to locate the region used to handle the call. 437 * 438 * <p> 439 * The obtained {@link com.google.protobuf.RpcChannel} instance can be used to access a published 440 * coprocessor {@link com.google.protobuf.Service} using standard protobuf service invocations: 441 * </p> 442 * 443 * <div style="background-color: #cccccc; padding: 2px"> 444 * <blockquote><pre> 445 * CoprocessorRpcChannel channel = myTable.coprocessorService(rowkey); 446 * MyService.BlockingInterface service = MyService.newBlockingStub(channel); 447 * MyCallRequest request = MyCallRequest.newBuilder() 448 * ... 449 * .build(); 450 * MyCallResponse response = service.myCall(null, request); 451 * </pre></blockquote></div> 452 * 453 * @param row The row key used to identify the remote region location 454 * @return A CoprocessorRpcChannel instance 455 */ 456 @InterfaceAudience.Private // TODO add coproc audience level 457 CoprocessorRpcChannel coprocessorService(byte[] row); 458 459 /** 460 * Creates an instance of the given {@link com.google.protobuf.Service} subclass for each table 461 * region spanning the range from the {@code startKey} row to {@code endKey} row (inclusive), 462 * and invokes the passed {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} 463 * method with each {@link Service} 464 * instance. 465 * 466 * @param service the protocol buffer {@code Service} implementation to call 467 * @param startKey start region selection with region containing this row. If {@code null}, the 468 * selection will start with the first table region. 469 * @param endKey select regions up to and including the region containing this row. 470 * If {@code null}, selection will continue through the last table region. 471 * @param callable this instance's 472 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} 473 * method will be invoked once per table region, using the {@link Service} 474 * instance connected to that region. 475 * @param <T> the {@link Service} subclass to connect to 476 * @param <R> Return type for the {@code callable} parameter's 477 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method 478 * @return a map of result values keyed by region name 479 */ 480 @InterfaceAudience.Private // TODO add coproc audience level 481 <T extends Service, R> Map<byte[],R> coprocessorService(final Class<T> service, 482 byte[] startKey, byte[] endKey, final Batch.Call<T,R> callable) 483 throws ServiceException, Throwable; 484 485 /** 486 * Creates an instance of the given {@link com.google.protobuf.Service} subclass for each table 487 * region spanning the range from the {@code startKey} row to {@code endKey} row (inclusive), 488 * and invokes the passed {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} 489 * method with each {@link Service} instance. 490 * 491 * <p> 492 * The given 493 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Callback#update(byte[], byte[], Object)} 494 * method will be called with the return value from each region's 495 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} invocation. 496 *</p> 497 * 498 * @param service the protocol buffer {@code Service} implementation to call 499 * @param startKey start region selection with region containing this row. If {@code null}, the 500 * selection will start with the first table region. 501 * @param endKey select regions up to and including the region containing this row. 502 * If {@code null}, selection will continue through the last table region. 503 * @param callable this instance's 504 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method 505 * will be invoked once per table region, using the {@link Service} instance 506 * connected to that region. 507 * @param callback 508 * @param <T> the {@link Service} subclass to connect to 509 * @param <R> Return type for the {@code callable} parameter's 510 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method 511 */ 512 @InterfaceAudience.Private // TODO add coproc audience level 513 <T extends Service, R> void coprocessorService(final Class<T> service, 514 byte[] startKey, byte[] endKey, final Batch.Call<T,R> callable, 515 final Batch.Callback<R> callback) throws ServiceException, Throwable; 516 517 /** 518 * See {@link #setAutoFlush(boolean, boolean)} 519 * 520 * @param autoFlush 521 * Whether or not to enable 'auto-flush'. 522 * @deprecated in 0.96. When called with setAutoFlush(false), this function also 523 * set clearBufferOnFail to true, which is unexpected but kept for historical reasons. 524 * Replace it with setAutoFlush(false, false) if this is exactly what you want, or by 525 * {@link #setAutoFlushTo(boolean)} for all other cases. 526 */ 527 @Deprecated 528 void setAutoFlush(boolean autoFlush); 529 530 /** 531 * Turns 'auto-flush' on or off. 532 * <p> 533 * When enabled (default), {@link Put} operations don't get buffered/delayed 534 * and are immediately executed. Failed operations are not retried. This is 535 * slower but safer. 536 * <p> 537 * Turning off {@code #autoFlush} means that multiple {@link Put}s will be 538 * accepted before any RPC is actually sent to do the write operations. If the 539 * application dies before pending writes get flushed to HBase, data will be 540 * lost. 541 * <p> 542 * When you turn {@code #autoFlush} off, you should also consider the 543 * {@code #clearBufferOnFail} option. By default, asynchronous {@link Put} 544 * requests will be retried on failure until successful. However, this can 545 * pollute the writeBuffer and slow down batching performance. Additionally, 546 * you may want to issue a number of Put requests and call 547 * {@link #flushCommits()} as a barrier. In both use cases, consider setting 548 * clearBufferOnFail to true to erase the buffer after {@link #flushCommits()} 549 * has been called, regardless of success. 550 * <p> 551 * In other words, if you call {@code #setAutoFlush(false)}; HBase will retry N time for each 552 * flushCommit, including the last one when closing the table. This is NOT recommended, 553 * most of the time you want to call {@code #setAutoFlush(false, true)}. 554 * 555 * @param autoFlush 556 * Whether or not to enable 'auto-flush'. 557 * @param clearBufferOnFail 558 * Whether to keep Put failures in the writeBuffer. If autoFlush is true, then 559 * the value of this parameter is ignored and clearBufferOnFail is set to true. 560 * Setting clearBufferOnFail to false is deprecated since 0.96. 561 * @see #flushCommits 562 */ 563 void setAutoFlush(boolean autoFlush, boolean clearBufferOnFail); 564 565 /** 566 * Set the autoFlush behavior, without changing the value of {@code clearBufferOnFail} 567 */ 568 void setAutoFlushTo(boolean autoFlush); 569 570 /** 571 * Returns the maximum size in bytes of the write buffer for this HTable. 572 * <p> 573 * The default value comes from the configuration parameter 574 * {@code hbase.client.write.buffer}. 575 * @return The size of the write buffer in bytes. 576 */ 577 long getWriteBufferSize(); 578 579 /** 580 * Sets the size of the buffer in bytes. 581 * <p> 582 * If the new size is less than the current amount of data in the 583 * write buffer, the buffer gets flushed. 584 * @param writeBufferSize The new write buffer size, in bytes. 585 * @throws IOException if a remote or network exception occurs. 586 */ 587 void setWriteBufferSize(long writeBufferSize) throws IOException; 588 }