1 /** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 package org.apache.hadoop.hbase.client; 20 21 import org.apache.hadoop.classification.InterfaceAudience; 22 import org.apache.hadoop.classification.InterfaceStability; 23 import org.apache.hadoop.hbase.HRegionLocation; 24 25 import java.io.IOException; 26 import java.util.ArrayList; 27 import java.util.HashMap; 28 import java.util.List; 29 import java.util.Map; 30 31 /** 32 * Utility class for HTable. 33 * 34 * 35 */ 36 @InterfaceAudience.Private 37 public class HTableUtil { 38 39 private static final int INITIAL_LIST_SIZE = 250; 40 41 /** 42 * Processes a List of Puts and writes them to an HTable instance in RegionServer buckets via the htable.put method. 43 * This will utilize the writeBuffer, thus the writeBuffer flush frequency may be tuned accordingly via htable.setWriteBufferSize. 44 * <br><br> 45 * The benefit of submitting Puts in this manner is to minimize the number of RegionServer RPCs in each flush. 46 * <br><br> 47 * Assumption #1: Regions have been pre-created for the table. If they haven't, then all of the Puts will go to the same region, 48 * defeating the purpose of this utility method. See the Apache HBase book for an explanation of how to do this. 49 * <br> 50 * Assumption #2: Row-keys are not monotonically increasing. See the Apache HBase book for an explanation of this problem. 51 * <br> 52 * Assumption #3: That the input list of Puts is big enough to be useful (in the thousands or more). The intent of this 53 * method is to process larger chunks of data. 54 * <br> 55 * Assumption #4: htable.setAutoFlush(false) has been set. This is a requirement to use the writeBuffer. 56 * <br><br> 57 * @param htable HTable instance for target HBase table 58 * @param puts List of Put instances 59 * @throws IOException if a remote or network exception occurs 60 * 61 */ 62 public static void bucketRsPut(HTable htable, List<Put> puts) throws IOException { 63 64 Map<String, List<Put>> putMap = createRsPutMap(htable, puts); 65 for (List<Put> rsPuts: putMap.values()) { 66 htable.put( rsPuts ); 67 } 68 htable.flushCommits(); 69 } 70 71 /** 72 * Processes a List of Rows (Put, Delete) and writes them to an HTable instance in RegionServer buckets via the htable.batch method. 73 * <br><br> 74 * The benefit of submitting Puts in this manner is to minimize the number of RegionServer RPCs, thus this will 75 * produce one RPC of Puts per RegionServer. 76 * <br><br> 77 * Assumption #1: Regions have been pre-created for the table. If they haven't, then all of the Puts will go to the same region, 78 * defeating the purpose of this utility method. See the Apache HBase book for an explanation of how to do this. 79 * <br> 80 * Assumption #2: Row-keys are not monotonically increasing. See the Apache HBase book for an explanation of this problem. 81 * <br> 82 * Assumption #3: That the input list of Rows is big enough to be useful (in the thousands or more). The intent of this 83 * method is to process larger chunks of data. 84 * <br><br> 85 * This method accepts a list of Row objects because the underlying .batch method accepts a list of Row objects. 86 * <br><br> 87 * @param htable HTable instance for target HBase table 88 * @param rows List of Row instances 89 * @throws IOException if a remote or network exception occurs 90 */ 91 public static void bucketRsBatch(HTable htable, List<Row> rows) throws IOException { 92 93 try { 94 Map<String, List<Row>> rowMap = createRsRowMap(htable, rows); 95 for (List<Row> rsRows: rowMap.values()) { 96 htable.batch( rsRows ); 97 } 98 } catch (InterruptedException e) { 99 throw new IOException(e); 100 } 101 102 } 103 104 private static Map<String,List<Put>> createRsPutMap(HTable htable, List<Put> puts) throws IOException { 105 106 Map<String, List<Put>> putMap = new HashMap<String, List<Put>>(); 107 for (Put put: puts) { 108 HRegionLocation rl = htable.getRegionLocation( put.getRow() ); 109 String hostname = rl.getHostname(); 110 List<Put> recs = putMap.get( hostname); 111 if (recs == null) { 112 recs = new ArrayList<Put>(INITIAL_LIST_SIZE); 113 putMap.put( hostname, recs); 114 } 115 recs.add(put); 116 } 117 return putMap; 118 } 119 120 private static Map<String,List<Row>> createRsRowMap(HTable htable, List<Row> rows) throws IOException { 121 122 Map<String, List<Row>> rowMap = new HashMap<String, List<Row>>(); 123 for (Row row: rows) { 124 HRegionLocation rl = htable.getRegionLocation( row.getRow() ); 125 String hostname = rl.getHostname(); 126 List<Row> recs = rowMap.get( hostname); 127 if (recs == null) { 128 recs = new ArrayList<Row>(INITIAL_LIST_SIZE); 129 rowMap.put( hostname, recs); 130 } 131 recs.add(row); 132 } 133 return rowMap; 134 } 135 136 }