1 /** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 package org.apache.hadoop.hbase.client; 20 21 import com.google.protobuf.Service; 22 import com.google.protobuf.ServiceException; 23 import org.apache.hadoop.classification.InterfaceAudience; 24 import org.apache.hadoop.classification.InterfaceStability; 25 import org.apache.hadoop.conf.Configuration; 26 import org.apache.hadoop.hbase.HTableDescriptor; 27 import org.apache.hadoop.hbase.KeyValue; 28 import org.apache.hadoop.hbase.client.coprocessor.Batch; 29 import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel; 30 31 import java.io.Closeable; 32 import java.io.IOException; 33 import java.util.List; 34 import java.util.Map; 35 36 /** 37 * Used to communicate with a single HBase table. 38 * 39 * @since 0.21.0 40 */ 41 @InterfaceAudience.Public 42 @InterfaceStability.Stable 43 public interface HTableInterface extends Closeable { 44 45 /** 46 * Gets the name of this table. 47 * 48 * @return the table name. 49 */ 50 byte[] getTableName(); 51 52 /** 53 * Returns the {@link Configuration} object used by this instance. 54 * <p> 55 * The reference returned is not a copy, so any change made to it will 56 * affect this instance. 57 */ 58 Configuration getConfiguration(); 59 60 /** 61 * Gets the {@link HTableDescriptor table descriptor} for this table. 62 * @throws IOException if a remote or network exception occurs. 63 */ 64 HTableDescriptor getTableDescriptor() throws IOException; 65 66 /** 67 * Test for the existence of columns in the table, as specified by the Get. 68 * <p> 69 * 70 * This will return true if the Get matches one or more keys, false if not. 71 * <p> 72 * 73 * This is a server-side call so it prevents any data from being transfered to 74 * the client. 75 * 76 * @param get the Get 77 * @return true if the specified Get matches one or more keys, false if not 78 * @throws IOException e 79 */ 80 boolean exists(Get get) throws IOException; 81 82 /** 83 * Test for the existence of columns in the table, as specified by the Gets. 84 * <p> 85 * 86 * This will return an array of booleans. Each value will be true if the related Get matches 87 * one or more keys, false if not. 88 * <p> 89 * 90 * This is a server-side call so it prevents any data from being transfered to 91 * the client. 92 * 93 * @param gets the Gets 94 * @return Array of Boolean true if the specified Get matches one or more keys, false if not 95 * @throws IOException e 96 */ 97 Boolean[] exists(List<Get> gets) throws IOException; 98 99 /** 100 * Method that does a batch call on Deletes, Gets, Puts, Increments, Appends and RowMutations. 101 * The ordering of execution of the actions is not defined. Meaning if you do a Put and a 102 * Get in the same {@link #batch} call, you will not necessarily be 103 * guaranteed that the Get returns what the Put had put. 104 * 105 * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects 106 * @param results Empty Object[], same size as actions. Provides access to partial 107 * results, in case an exception is thrown. A null in the result array means that 108 * the call for that action failed, even after retries 109 * @throws IOException 110 * @since 0.90.0 111 */ 112 void batch(final List<?extends Row> actions, final Object[] results) throws IOException, InterruptedException; 113 114 /** 115 * Same as {@link #batch(List, Object[])}, but returns an array of 116 * results instead of using a results parameter reference. 117 * 118 * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects 119 * @return the results from the actions. A null in the return array means that 120 * the call for that action failed, even after retries 121 * @throws IOException 122 * @since 0.90.0 123 */ 124 Object[] batch(final List<? extends Row> actions) throws IOException, InterruptedException; 125 126 /** 127 * Same as {@link #batch(List, Object[])}, but with a callback. 128 * @since 0.96.0 129 */ 130 public <R> void batchCallback( 131 final List<? extends Row> actions, final Object[] results, final Batch.Callback<R> callback) 132 throws IOException, InterruptedException; 133 134 135 /** 136 * Same as {@link #batch(List)}, but with a callback. 137 * @since 0.96.0 138 */ 139 public <R> Object[] batchCallback( 140 List<? extends Row> actions, Batch.Callback<R> callback) throws IOException, 141 InterruptedException; 142 143 /** 144 * Extracts certain cells from a given row. 145 * @param get The object that specifies what data to fetch and from which row. 146 * @return The data coming from the specified row, if it exists. If the row 147 * specified doesn't exist, the {@link Result} instance returned won't 148 * contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}. 149 * @throws IOException if a remote or network exception occurs. 150 * @since 0.20.0 151 */ 152 Result get(Get get) throws IOException; 153 154 /** 155 * Extracts certain cells from the given rows, in batch. 156 * 157 * @param gets The objects that specify what data to fetch and from which rows. 158 * 159 * @return The data coming from the specified rows, if it exists. If the row 160 * specified doesn't exist, the {@link Result} instance returned won't 161 * contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}. 162 * If there are any failures even after retries, there will be a null in 163 * the results array for those Gets, AND an exception will be thrown. 164 * @throws IOException if a remote or network exception occurs. 165 * 166 * @since 0.90.0 167 */ 168 Result[] get(List<Get> gets) throws IOException; 169 170 /** 171 * Return the row that matches <i>row</i> exactly, 172 * or the one that immediately precedes it. 173 * 174 * @param row A row key. 175 * @param family Column family to include in the {@link Result}. 176 * @throws IOException if a remote or network exception occurs. 177 * @since 0.20.0 178 * 179 * @deprecated As of version 0.92 this method is deprecated without 180 * replacement. 181 * getRowOrBefore is used internally to find entries in .META. and makes 182 * various assumptions about the table (which are true for .META. but not 183 * in general) to be efficient. 184 */ 185 Result getRowOrBefore(byte[] row, byte[] family) throws IOException; 186 187 /** 188 * Returns a scanner on the current table as specified by the {@link Scan} 189 * object. 190 * Note that the passed {@link Scan}'s start row and caching properties 191 * maybe changed. 192 * 193 * @param scan A configured {@link Scan} object. 194 * @return A scanner. 195 * @throws IOException if a remote or network exception occurs. 196 * @since 0.20.0 197 */ 198 ResultScanner getScanner(Scan scan) throws IOException; 199 200 /** 201 * Gets a scanner on the current table for the given family. 202 * 203 * @param family The column family to scan. 204 * @return A scanner. 205 * @throws IOException if a remote or network exception occurs. 206 * @since 0.20.0 207 */ 208 ResultScanner getScanner(byte[] family) throws IOException; 209 210 /** 211 * Gets a scanner on the current table for the given family and qualifier. 212 * 213 * @param family The column family to scan. 214 * @param qualifier The column qualifier to scan. 215 * @return A scanner. 216 * @throws IOException if a remote or network exception occurs. 217 * @since 0.20.0 218 */ 219 ResultScanner getScanner(byte[] family, byte[] qualifier) throws IOException; 220 221 222 /** 223 * Puts some data in the table. 224 * <p> 225 * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered 226 * until the internal buffer is full. 227 * @param put The data to put. 228 * @throws IOException if a remote or network exception occurs. 229 * @since 0.20.0 230 */ 231 void put(Put put) throws IOException; 232 233 /** 234 * Puts some data in the table, in batch. 235 * <p> 236 * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered 237 * until the internal buffer is full. 238 * <p> 239 * This can be used for group commit, or for submitting user defined 240 * batches. The writeBuffer will be periodically inspected while the List 241 * is processed, so depending on the List size the writeBuffer may flush 242 * not at all, or more than once. 243 * @param puts The list of mutations to apply. The batch put is done by 244 * aggregating the iteration of the Puts over the write buffer 245 * at the client-side for a single RPC call. 246 * @throws IOException if a remote or network exception occurs. 247 * @since 0.20.0 248 */ 249 void put(List<Put> puts) throws IOException; 250 251 /** 252 * Atomically checks if a row/family/qualifier value matches the expected 253 * value. If it does, it adds the put. If the passed value is null, the check 254 * is for the lack of column (ie: non-existance) 255 * 256 * @param row to check 257 * @param family column family to check 258 * @param qualifier column qualifier to check 259 * @param value the expected value 260 * @param put data to put if check succeeds 261 * @throws IOException e 262 * @return true if the new put was executed, false otherwise 263 */ 264 boolean checkAndPut(byte[] row, byte[] family, byte[] qualifier, 265 byte[] value, Put put) throws IOException; 266 267 /** 268 * Deletes the specified cells/row. 269 * 270 * @param delete The object that specifies what to delete. 271 * @throws IOException if a remote or network exception occurs. 272 * @since 0.20.0 273 */ 274 void delete(Delete delete) throws IOException; 275 276 /** 277 * Deletes the specified cells/rows in bulk. 278 * @param deletes List of things to delete. List gets modified by this 279 * method (in particular it gets re-ordered, so the order in which the elements 280 * are inserted in the list gives no guarantee as to the order in which the 281 * {@link Delete}s are executed). 282 * @throws IOException if a remote or network exception occurs. In that case 283 * the {@code deletes} argument will contain the {@link Delete} instances 284 * that have not be successfully applied. 285 * @since 0.20.1 286 */ 287 void delete(List<Delete> deletes) throws IOException; 288 289 /** 290 * Atomically checks if a row/family/qualifier value matches the expected 291 * value. If it does, it adds the delete. If the passed value is null, the 292 * check is for the lack of column (ie: non-existance) 293 * 294 * @param row to check 295 * @param family column family to check 296 * @param qualifier column qualifier to check 297 * @param value the expected value 298 * @param delete data to delete if check succeeds 299 * @throws IOException e 300 * @return true if the new delete was executed, false otherwise 301 */ 302 boolean checkAndDelete(byte[] row, byte[] family, byte[] qualifier, 303 byte[] value, Delete delete) throws IOException; 304 305 /** 306 * Performs multiple mutations atomically on a single row. Currently 307 * {@link Put} and {@link Delete} are supported. 308 * 309 * @param rm object that specifies the set of mutations to perform atomically 310 * @throws IOException 311 */ 312 public void mutateRow(final RowMutations rm) throws IOException; 313 314 /** 315 * Appends values to one or more columns within a single row. 316 * <p> 317 * This operation does not appear atomic to readers. Appends are done 318 * under a single row lock, so write operations to a row are synchronized, but 319 * readers do not take row locks so get and scan operations can see this 320 * operation partially completed. 321 * 322 * @param append object that specifies the columns and amounts to be used 323 * for the increment operations 324 * @throws IOException e 325 * @return values of columns after the append operation (maybe null) 326 */ 327 public Result append(final Append append) throws IOException; 328 329 /** 330 * Increments one or more columns within a single row. 331 * <p> 332 * This operation does not appear atomic to readers. Increments are done 333 * under a single row lock, so write operations to a row are synchronized, but 334 * readers do not take row locks so get and scan operations can see this 335 * operation partially completed. 336 * 337 * @param increment object that specifies the columns and amounts to be used 338 * for the increment operations 339 * @throws IOException e 340 * @return values of columns after the increment 341 */ 342 public Result increment(final Increment increment) throws IOException; 343 344 /** 345 * Atomically increments a column value. 346 * <p> 347 * Equivalent to {@link #incrementColumnValue(byte[], byte[], byte[], 348 * long, boolean) incrementColumnValue}(row, family, qualifier, amount, 349 * <b>true</b>)} 350 * @param row The row that contains the cell to increment. 351 * @param family The column family of the cell to increment. 352 * @param qualifier The column qualifier of the cell to increment. 353 * @param amount The amount to increment the cell with (or decrement, if the 354 * amount is negative). 355 * @return The new value, post increment. 356 * @throws IOException if a remote or network exception occurs. 357 */ 358 long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier, 359 long amount) throws IOException; 360 361 /** 362 * Atomically increments a column value. If the column value already exists 363 * and is not a big-endian long, this could throw an exception. If the column 364 * value does not yet exist it is initialized to <code>amount</code> and 365 * written to the specified column. 366 * 367 * <p>Setting writeToWAL to false means that in a fail scenario, you will lose 368 * any increments that have not been flushed. 369 * @param row The row that contains the cell to increment. 370 * @param family The column family of the cell to increment. 371 * @param qualifier The column qualifier of the cell to increment. 372 * @param amount The amount to increment the cell with (or decrement, if the 373 * amount is negative). 374 * @param writeToWAL if {@code true}, the operation will be applied to the 375 * Write Ahead Log (WAL). This makes the operation slower but safer, as if 376 * the call returns successfully, it is guaranteed that the increment will 377 * be safely persisted. When set to {@code false}, the call may return 378 * successfully before the increment is safely persisted, so it's possible 379 * that the increment be lost in the event of a failure happening before the 380 * operation gets persisted. 381 * @return The new value, post increment. 382 * @throws IOException if a remote or network exception occurs. 383 */ 384 long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier, 385 long amount, boolean writeToWAL) throws IOException; 386 387 /** 388 * Tells whether or not 'auto-flush' is turned on. 389 * 390 * @return {@code true} if 'auto-flush' is enabled (default), meaning 391 * {@link Put} operations don't get buffered/delayed and are immediately 392 * executed. 393 */ 394 boolean isAutoFlush(); 395 396 /** 397 * Executes all the buffered {@link Put} operations. 398 * <p> 399 * This method gets called once automatically for every {@link Put} or batch 400 * of {@link Put}s (when <code>put(List<Put>)</code> is used) when 401 * {@link #isAutoFlush} is {@code true}. 402 * @throws IOException if a remote or network exception occurs. 403 */ 404 void flushCommits() throws IOException; 405 406 /** 407 * Releases any resources held or pending changes in internal buffers. 408 * 409 * @throws IOException if a remote or network exception occurs. 410 */ 411 void close() throws IOException; 412 413 /** 414 * Creates and returns a {@link com.google.protobuf.RpcChannel} instance connected to the 415 * table region containing the specified row. The row given does not actually have 416 * to exist. Whichever region would contain the row based on start and end keys will 417 * be used. Note that the {@code row} parameter is also not passed to the 418 * coprocessor handler registered for this protocol, unless the {@code row} 419 * is separately passed as an argument in the service request. The parameter 420 * here is only used to locate the region used to handle the call. 421 * 422 * <p> 423 * The obtained {@link com.google.protobuf.RpcChannel} instance can be used to access a published 424 * coprocessor {@link com.google.protobuf.Service} using standard protobuf service invocations: 425 * </p> 426 * 427 * <div style="background-color: #cccccc; padding: 2px"> 428 * <blockquote><pre> 429 * CoprocessorRpcChannel channel = myTable.coprocessorService(rowkey); 430 * MyService.BlockingInterface service = MyService.newBlockingStub(channel); 431 * MyCallRequest request = MyCallRequest.newBuilder() 432 * ... 433 * .build(); 434 * MyCallResponse response = service.myCall(null, request); 435 * </pre></blockquote></div> 436 * 437 * @param row The row key used to identify the remote region location 438 * @return A CoprocessorRpcChannel instance 439 */ 440 CoprocessorRpcChannel coprocessorService(byte[] row); 441 442 /** 443 * Creates an instance of the given {@link com.google.protobuf.Service} subclass for each table 444 * region spanning the range from the {@code startKey} row to {@code endKey} row (inclusive), 445 * and invokes the passed {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} 446 * method with each {@link Service} 447 * instance. 448 * 449 * @param service the protocol buffer {@code Service} implementation to call 450 * @param startKey start region selection with region containing this row. If {@code null}, the 451 * selection will start with the first table region. 452 * @param endKey select regions up to and including the region containing this row. 453 * If {@code null}, selection will continue through the last table region. 454 * @param callable this instance's 455 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} 456 * method will be invoked once per table region, using the {@link Service} 457 * instance connected to that region. 458 * @param <T> the {@link Service} subclass to connect to 459 * @param <R> Return type for the {@code callable} parameter's 460 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method 461 * @return a map of result values keyed by region name 462 */ 463 <T extends Service, R> Map<byte[],R> coprocessorService(final Class<T> service, 464 byte[] startKey, byte[] endKey, final Batch.Call<T,R> callable) 465 throws ServiceException, Throwable; 466 467 /** 468 * Creates an instance of the given {@link com.google.protobuf.Service} subclass for each table 469 * region spanning the range from the {@code startKey} row to {@code endKey} row (inclusive), 470 * and invokes the passed {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} 471 * method with each {@link Service} instance. 472 * 473 * <p> 474 * The given 475 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Callback#update(byte[], byte[], Object)} 476 * method will be called with the return value from each region's 477 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} invocation. 478 *</p> 479 * 480 * @param service the protocol buffer {@code Service} implementation to call 481 * @param startKey start region selection with region containing this row. If {@code null}, the 482 * selection will start with the first table region. 483 * @param endKey select regions up to and including the region containing this row. 484 * If {@code null}, selection will continue through the last table region. 485 * @param callable this instance's 486 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method 487 * will be invoked once per table region, using the {@link Service} instance 488 * connected to that region. 489 * @param callback 490 * @param <T> the {@link Service} subclass to connect to 491 * @param <R> Return type for the {@code callable} parameter's 492 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method 493 */ 494 <T extends Service, R> void coprocessorService(final Class<T> service, 495 byte[] startKey, byte[] endKey, final Batch.Call<T,R> callable, 496 final Batch.Callback<R> callback) throws ServiceException, Throwable; 497 498 /** 499 * See {@link #setAutoFlush(boolean, boolean)} 500 * 501 * @param autoFlush 502 * Whether or not to enable 'auto-flush'. 503 */ 504 public void setAutoFlush(boolean autoFlush); 505 506 /** 507 * Turns 'auto-flush' on or off. 508 * <p> 509 * When enabled (default), {@link Put} operations don't get buffered/delayed 510 * and are immediately executed. Failed operations are not retried. This is 511 * slower but safer. 512 * <p> 513 * Turning off {@code autoFlush} means that multiple {@link Put}s will be 514 * accepted before any RPC is actually sent to do the write operations. If the 515 * application dies before pending writes get flushed to HBase, data will be 516 * lost. 517 * <p> 518 * When you turn {@code #autoFlush} off, you should also consider the 519 * {@code clearBufferOnFail} option. By default, asynchronous {@link Put} 520 * requests will be retried on failure until successful. However, this can 521 * pollute the writeBuffer and slow down batching performance. Additionally, 522 * you may want to issue a number of Put requests and call 523 * {@link #flushCommits()} as a barrier. In both use cases, consider setting 524 * clearBufferOnFail to true to erase the buffer after {@link #flushCommits()} 525 * has been called, regardless of success. 526 * 527 * @param autoFlush 528 * Whether or not to enable 'auto-flush'. 529 * @param clearBufferOnFail 530 * Whether to keep Put failures in the writeBuffer 531 * @see #flushCommits 532 */ 533 public void setAutoFlush(boolean autoFlush, boolean clearBufferOnFail); 534 535 /** 536 * Returns the maximum size in bytes of the write buffer for this HTable. 537 * <p> 538 * The default value comes from the configuration parameter 539 * {@code hbase.client.write.buffer}. 540 * @return The size of the write buffer in bytes. 541 */ 542 public long getWriteBufferSize(); 543 544 /** 545 * Sets the size of the buffer in bytes. 546 * <p> 547 * If the new size is less than the current amount of data in the 548 * write buffer, the buffer gets flushed. 549 * @param writeBufferSize The new write buffer size, in bytes. 550 * @throws IOException if a remote or network exception occurs. 551 */ 552 public void setWriteBufferSize(long writeBufferSize) throws IOException; 553 }