1 /** 2 * Copyright 2010 The Apache Software Foundation 3 * 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 package org.apache.hadoop.hbase.client; 21 22 import java.io.Closeable; 23 import java.io.IOException; 24 import java.util.List; 25 import java.util.Map; 26 27 import org.apache.hadoop.conf.Configuration; 28 import org.apache.hadoop.hbase.HTableDescriptor; 29 import org.apache.hadoop.hbase.KeyValue; 30 import org.apache.hadoop.hbase.client.coprocessor.Batch; 31 import org.apache.hadoop.hbase.ipc.CoprocessorProtocol; 32 33 /** 34 * Used to communicate with a single HBase table. 35 * 36 * @since 0.21.0 37 */ 38 public interface HTableInterface extends Closeable { 39 40 /** 41 * Gets the name of this table. 42 * 43 * @return the table name. 44 */ 45 byte[] getTableName(); 46 47 /** 48 * Returns the {@link Configuration} object used by this instance. 49 * <p> 50 * The reference returned is not a copy, so any change made to it will 51 * affect this instance. 52 */ 53 Configuration getConfiguration(); 54 55 /** 56 * Gets the {@link HTableDescriptor table descriptor} for this table. 57 * @throws IOException if a remote or network exception occurs. 58 */ 59 HTableDescriptor getTableDescriptor() throws IOException; 60 61 /** 62 * Test for the existence of columns in the table, as specified in the Get. 63 * <p> 64 * 65 * This will return true if the Get matches one or more keys, false if not. 66 * <p> 67 * 68 * This is a server-side call so it prevents any data from being transfered to 69 * the client. 70 * 71 * @param get the Get 72 * @return true if the specified Get matches one or more keys, false if not 73 * @throws IOException e 74 */ 75 boolean exists(Get get) throws IOException; 76 77 /** 78 * Method that does a batch call on Deletes, Gets, Puts, Increments, Appends and RowMutations. 79 * The execution ordering of the actions is not defined. Meaning if you do a Put and a 80 * Get in the same {@link #batch} call, you will not necessarily be 81 * guaranteed that the Get returns what the Put had put. 82 * 83 * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects 84 * @param results Empty Object[], same size as actions. Provides access to partial 85 * results, in case an exception is thrown. A null in the result array means that 86 * the call for that action failed, even after retries 87 * @throws IOException 88 * @since 0.90.0 89 */ 90 void batch(final List<?extends Row> actions, final Object[] results) throws IOException, InterruptedException; 91 92 /** 93 * Same as {@link #batch(List, Object[])}, but returns an array of 94 * results instead of using a results parameter reference. 95 * 96 * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects 97 * @return the results from the actions. A null in the return array means that 98 * the call for that action failed, even after retries 99 * @throws IOException 100 * @since 0.90.0 101 */ 102 Object[] batch(final List<? extends Row> actions) throws IOException, InterruptedException; 103 104 /** 105 * Extracts certain cells from a given row. 106 * @param get The object that specifies what data to fetch and from which row. 107 * @return The data coming from the specified row, if it exists. If the row 108 * specified doesn't exist, the {@link Result} instance returned won't 109 * contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}. 110 * @throws IOException if a remote or network exception occurs. 111 * @since 0.20.0 112 */ 113 Result get(Get get) throws IOException; 114 115 /** 116 * Extracts certain cells from the given rows, in batch. 117 * 118 * @param gets The objects that specify what data to fetch and from which rows. 119 * 120 * @return The data coming from the specified rows, if it exists. If the row 121 * specified doesn't exist, the {@link Result} instance returned won't 122 * contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}. 123 * If there are any failures even after retries, there will be a null in 124 * the results array for those Gets, AND an exception will be thrown. 125 * @throws IOException if a remote or network exception occurs. 126 * 127 * @since 0.90.0 128 */ 129 Result[] get(List<Get> gets) throws IOException; 130 131 /** 132 * Return the row that matches <i>row</i> exactly, 133 * or the one that immediately precedes it. 134 * 135 * @param row A row key. 136 * @param family Column family to include in the {@link Result}. 137 * @throws IOException if a remote or network exception occurs. 138 * @since 0.20.0 139 * 140 * @deprecated As of version 0.92 this method is deprecated without 141 * replacement. 142 * getRowOrBefore is used internally to find entries in .META. and makes 143 * various assumptions about the table (which are true for .META. but not 144 * in general) to be efficient. 145 */ 146 Result getRowOrBefore(byte[] row, byte[] family) throws IOException; 147 148 /** 149 * Returns a scanner on the current table as specified by the {@link Scan} 150 * object. 151 * Note that the passed {@link Scan}'s start row and caching properties 152 * maybe changed. 153 * 154 * @param scan A configured {@link Scan} object. 155 * @return A scanner. 156 * @throws IOException if a remote or network exception occurs. 157 * @since 0.20.0 158 */ 159 ResultScanner getScanner(Scan scan) throws IOException; 160 161 /** 162 * Gets a scanner on the current table for the given family. 163 * 164 * @param family The column family to scan. 165 * @return A scanner. 166 * @throws IOException if a remote or network exception occurs. 167 * @since 0.20.0 168 */ 169 ResultScanner getScanner(byte[] family) throws IOException; 170 171 /** 172 * Gets a scanner on the current table for the given family and qualifier. 173 * 174 * @param family The column family to scan. 175 * @param qualifier The column qualifier to scan. 176 * @return A scanner. 177 * @throws IOException if a remote or network exception occurs. 178 * @since 0.20.0 179 */ 180 ResultScanner getScanner(byte[] family, byte[] qualifier) throws IOException; 181 182 183 /** 184 * Puts some data in the table. 185 * <p> 186 * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered 187 * until the internal buffer is full. 188 * @param put The data to put. 189 * @throws IOException if a remote or network exception occurs. 190 * @since 0.20.0 191 */ 192 void put(Put put) throws IOException; 193 194 /** 195 * Puts some data in the table, in batch. 196 * <p> 197 * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered 198 * until the internal buffer is full. 199 * <p> 200 * This can be used for group commit, or for submitting user defined 201 * batches. The writeBuffer will be periodically inspected while the List 202 * is processed, so depending on the List size the writeBuffer may flush 203 * not at all, or more than once. 204 * @param puts The list of mutations to apply. The batch put is done by 205 * aggregating the iteration of the Puts over the write buffer 206 * at the client-side for a single RPC call. 207 * @throws IOException if a remote or network exception occurs. 208 * @since 0.20.0 209 */ 210 void put(List<Put> puts) throws IOException; 211 212 /** 213 * Atomically checks if a row/family/qualifier value matches the expected 214 * value. If it does, it adds the put. If the passed value is null, the check 215 * is for the lack of column (ie: non-existance) 216 * 217 * @param row to check 218 * @param family column family to check 219 * @param qualifier column qualifier to check 220 * @param value the expected value 221 * @param put data to put if check succeeds 222 * @throws IOException e 223 * @return true if the new put was executed, false otherwise 224 */ 225 boolean checkAndPut(byte[] row, byte[] family, byte[] qualifier, 226 byte[] value, Put put) throws IOException; 227 228 /** 229 * Deletes the specified cells/row. 230 * 231 * @param delete The object that specifies what to delete. 232 * @throws IOException if a remote or network exception occurs. 233 * @since 0.20.0 234 */ 235 void delete(Delete delete) throws IOException; 236 237 /** 238 * Deletes the specified cells/rows in bulk. 239 * @param deletes List of things to delete. List gets modified by this 240 * method (in particular it gets re-ordered, so the order in which the elements 241 * are inserted in the list gives no guarantee as to the order in which the 242 * {@link Delete}s are executed). 243 * @throws IOException if a remote or network exception occurs. In that case 244 * the {@code deletes} argument will contain the {@link Delete} instances 245 * that have not be successfully applied. 246 * @since 0.20.1 247 */ 248 void delete(List<Delete> deletes) throws IOException; 249 250 /** 251 * Atomically checks if a row/family/qualifier value matches the expected 252 * value. If it does, it adds the delete. If the passed value is null, the 253 * check is for the lack of column (ie: non-existance) 254 * 255 * @param row to check 256 * @param family column family to check 257 * @param qualifier column qualifier to check 258 * @param value the expected value 259 * @param delete data to delete if check succeeds 260 * @throws IOException e 261 * @return true if the new delete was executed, false otherwise 262 */ 263 boolean checkAndDelete(byte[] row, byte[] family, byte[] qualifier, 264 byte[] value, Delete delete) throws IOException; 265 266 /** 267 * Performs multiple mutations atomically on a single row. Currently 268 * {@link Put} and {@link Delete} are supported. 269 * 270 * @param arm object that specifies the set of mutations to perform 271 * atomically 272 * @throws IOException 273 */ 274 public void mutateRow(final RowMutations rm) throws IOException; 275 276 /** 277 * Appends values to one or more columns within a single row. 278 * <p> 279 * This operation does not appear atomic to readers. Appends are done 280 * under a single row lock, so write operations to a row are synchronized, but 281 * readers do not take row locks so get and scan operations can see this 282 * operation partially completed. 283 * 284 * @param append object that specifies the columns and amounts to be used 285 * for the increment operations 286 * @throws IOException e 287 * @return values of columns after the append operation (maybe null) 288 */ 289 public Result append(final Append append) throws IOException; 290 291 /** 292 * Increments one or more columns within a single row. 293 * <p> 294 * This operation does not appear atomic to readers. Increments are done 295 * under a single row lock, so write operations to a row are synchronized, but 296 * readers do not take row locks so get and scan operations can see this 297 * operation partially completed. 298 * 299 * @param increment object that specifies the columns and amounts to be used 300 * for the increment operations 301 * @throws IOException e 302 * @return values of columns after the increment 303 */ 304 public Result increment(final Increment increment) throws IOException; 305 306 /** 307 * Atomically increments a column value. 308 * <p> 309 * Equivalent to {@link #incrementColumnValue(byte[], byte[], byte[], 310 * long, boolean) incrementColumnValue}(row, family, qualifier, amount, 311 * <b>true</b>)} 312 * @param row The row that contains the cell to increment. 313 * @param family The column family of the cell to increment. 314 * @param qualifier The column qualifier of the cell to increment. 315 * @param amount The amount to increment the cell with (or decrement, if the 316 * amount is negative). 317 * @return The new value, post increment. 318 * @throws IOException if a remote or network exception occurs. 319 */ 320 long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier, 321 long amount) throws IOException; 322 323 /** 324 * Atomically increments a column value. If the column value already exists 325 * and is not a big-endian long, this could throw an exception. If the column 326 * value does not yet exist it is initialized to <code>amount</code> and 327 * written to the specified column. 328 * 329 * <p>Setting writeToWAL to false means that in a fail scenario, you will lose 330 * any increments that have not been flushed. 331 * @param row The row that contains the cell to increment. 332 * @param family The column family of the cell to increment. 333 * @param qualifier The column qualifier of the cell to increment. 334 * @param amount The amount to increment the cell with (or decrement, if the 335 * amount is negative). 336 * @param writeToWAL if {@code true}, the operation will be applied to the 337 * Write Ahead Log (WAL). This makes the operation slower but safer, as if 338 * the call returns successfully, it is guaranteed that the increment will 339 * be safely persisted. When set to {@code false}, the call may return 340 * successfully before the increment is safely persisted, so it's possible 341 * that the increment be lost in the event of a failure happening before the 342 * operation gets persisted. 343 * @return The new value, post increment. 344 * @throws IOException if a remote or network exception occurs. 345 */ 346 long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier, 347 long amount, boolean writeToWAL) throws IOException; 348 349 /** 350 * Tells whether or not 'auto-flush' is turned on. 351 * 352 * @return {@code true} if 'auto-flush' is enabled (default), meaning 353 * {@link Put} operations don't get buffered/delayed and are immediately 354 * executed. 355 */ 356 boolean isAutoFlush(); 357 358 /** 359 * Executes all the buffered {@link Put} operations. 360 * <p> 361 * This method gets called once automatically for every {@link Put} or batch 362 * of {@link Put}s (when <code>put(List<Put>)</code> is used) when 363 * {@link #isAutoFlush} is {@code true}. 364 * @throws IOException if a remote or network exception occurs. 365 */ 366 void flushCommits() throws IOException; 367 368 /** 369 * Releases any resources help or pending changes in internal buffers. 370 * 371 * @throws IOException if a remote or network exception occurs. 372 */ 373 void close() throws IOException; 374 375 /** 376 * Obtains a lock on a row. 377 * 378 * @param row The row to lock. 379 * @return A {@link RowLock} containing the row and lock id. 380 * @throws IOException if a remote or network exception occurs. 381 * @see RowLock 382 * @see #unlockRow 383 * @deprecated {@link RowLock} and associated operations are deprecated 384 */ 385 RowLock lockRow(byte[] row) throws IOException; 386 387 /** 388 * Releases a row lock. 389 * 390 * @param rl The row lock to release. 391 * @throws IOException if a remote or network exception occurs. 392 * @see RowLock 393 * @see #unlockRow 394 * @deprecated {@link RowLock} and associated operations are deprecated 395 */ 396 void unlockRow(RowLock rl) throws IOException; 397 398 /** 399 * Creates and returns a proxy to the CoprocessorProtocol instance running in the 400 * region containing the specified row. The row given does not actually have 401 * to exist. Whichever region would contain the row based on start and end keys will 402 * be used. Note that the {@code row} parameter is also not passed to the 403 * coprocessor handler registered for this protocol, unless the {@code row} 404 * is separately passed as an argument in a proxy method call. The parameter 405 * here is just used to locate the region used to handle the call. 406 * 407 * @param protocol The class or interface defining the remote protocol 408 * @param row The row key used to identify the remote region location 409 * @return A CoprocessorProtocol instance 410 */ 411 <T extends CoprocessorProtocol> T coprocessorProxy(Class<T> protocol, byte[] row); 412 413 /** 414 * Invoke the passed 415 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call} against 416 * the {@link CoprocessorProtocol} instances running in the selected regions. 417 * All regions beginning with the region containing the <code>startKey</code> 418 * row, through to the region containing the <code>endKey</code> row (inclusive) 419 * will be used. If <code>startKey</code> or <code>endKey</code> is 420 * <code>null</code>, the first and last regions in the table, respectively, 421 * will be used in the range selection. 422 * 423 * @param protocol the CoprocessorProtocol implementation to call 424 * @param startKey start region selection with region containing this row 425 * @param endKey select regions up to and including the region containing 426 * this row 427 * @param callable wraps the CoprocessorProtocol implementation method calls 428 * made per-region 429 * @param <T> CoprocessorProtocol subclass for the remote invocation 430 * @param <R> Return type for the 431 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call(Object)} 432 * method 433 * @return a <code>Map</code> of region names to 434 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call(Object)} return values 435 */ 436 <T extends CoprocessorProtocol, R> Map<byte[],R> coprocessorExec( 437 Class<T> protocol, byte[] startKey, byte[] endKey, Batch.Call<T,R> callable) 438 throws IOException, Throwable; 439 440 /** 441 * Invoke the passed 442 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call} against 443 * the {@link CoprocessorProtocol} instances running in the selected regions. 444 * All regions beginning with the region containing the <code>startKey</code> 445 * row, through to the region containing the <code>endKey</code> row 446 * (inclusive) 447 * will be used. If <code>startKey</code> or <code>endKey</code> is 448 * <code>null</code>, the first and last regions in the table, respectively, 449 * will be used in the range selection. 450 * 451 * <p> 452 * For each result, the given 453 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Callback#update(byte[], byte[], Object)} 454 * method will be called. 455 *</p> 456 * 457 * @param protocol the CoprocessorProtocol implementation to call 458 * @param startKey start region selection with region containing this row 459 * @param endKey select regions up to and including the region containing 460 * this row 461 * @param callable wraps the CoprocessorProtocol implementation method calls 462 * made per-region 463 * @param callback an instance upon which 464 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Callback#update(byte[], byte[], Object)} with the 465 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call(Object)} 466 * return value for each region 467 * @param <T> CoprocessorProtocol subclass for the remote invocation 468 * @param <R> Return type for the 469 * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call(Object)} 470 * method 471 */ 472 <T extends CoprocessorProtocol, R> void coprocessorExec( 473 Class<T> protocol, byte[] startKey, byte[] endKey, 474 Batch.Call<T,R> callable, Batch.Callback<R> callback) 475 throws IOException, Throwable; 476 477 /** 478 * See {@link #setAutoFlush(boolean, boolean)} 479 * 480 * @param autoFlush 481 * Whether or not to enable 'auto-flush'. 482 */ 483 public void setAutoFlush(boolean autoFlush); 484 485 /** 486 * Turns 'auto-flush' on or off. 487 * <p> 488 * When enabled (default), {@link Put} operations don't get buffered/delayed 489 * and are immediately executed. Failed operations are not retried. This is 490 * slower but safer. 491 * <p> 492 * Turning off {@link #autoFlush} means that multiple {@link Put}s will be 493 * accepted before any RPC is actually sent to do the write operations. If the 494 * application dies before pending writes get flushed to HBase, data will be 495 * lost. 496 * <p> 497 * When you turn {@link #autoFlush} off, you should also consider the 498 * {@link #clearBufferOnFail} option. By default, asynchronous {@link Put} 499 * requests will be retried on failure until successful. However, this can 500 * pollute the writeBuffer and slow down batching performance. Additionally, 501 * you may want to issue a number of Put requests and call 502 * {@link #flushCommits()} as a barrier. In both use cases, consider setting 503 * clearBufferOnFail to true to erase the buffer after {@link #flushCommits()} 504 * has been called, regardless of success. 505 * 506 * @param autoFlush 507 * Whether or not to enable 'auto-flush'. 508 * @param clearBufferOnFail 509 * Whether to keep Put failures in the writeBuffer 510 * @see #flushCommits 511 */ 512 public void setAutoFlush(boolean autoFlush, boolean clearBufferOnFail); 513 514 /** 515 * Returns the maximum size in bytes of the write buffer for this HTable. 516 * <p> 517 * The default value comes from the configuration parameter 518 * {@code hbase.client.write.buffer}. 519 * @return The size of the write buffer in bytes. 520 */ 521 public long getWriteBufferSize(); 522 523 /** 524 * Sets the size of the buffer in bytes. 525 * <p> 526 * If the new size is less than the current amount of data in the 527 * write buffer, the buffer gets flushed. 528 * @param writeBufferSize The new write buffer size, in bytes. 529 * @throws IOException if a remote or network exception occurs. 530 */ 531 public void setWriteBufferSize(long writeBufferSize) throws IOException; 532 }