1 /** 2 * Copyright 2008 The Apache Software Foundation 3 * 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 package org.apache.hadoop.hbase.mapreduce; 21 22 import java.io.IOException; 23 24 import org.apache.commons.logging.Log; 25 import org.apache.commons.logging.LogFactory; 26 import org.apache.hadoop.conf.Configurable; 27 import org.apache.hadoop.conf.Configuration; 28 import org.apache.hadoop.hbase.HBaseConfiguration; 29 import org.apache.hadoop.hbase.client.HTable; 30 import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 31 import org.apache.hadoop.hbase.util.Bytes; 32 import org.apache.hadoop.mapreduce.Partitioner; 33 34 /** 35 * This is used to partition the output keys into groups of keys. 36 * Keys are grouped according to the regions that currently exist 37 * so that each reducer fills a single region so load is distributed. 38 * 39 * <p>This class is not suitable as partitioner creating hfiles 40 * for incremental bulk loads as region spread will likely change between time of 41 * hfile creation and load time. See {@link LoadIncrementalHFiles} 42 * and <a href="http://hbase.apache.org/docs/current/bulk-loads.html">Bulk Load</a>. 43 * 44 * @param <KEY> The type of the key. 45 * @param <VALUE> The type of the value. 46 */ 47 public class HRegionPartitioner<KEY, VALUE> 48 extends Partitioner<ImmutableBytesWritable, VALUE> 49 implements Configurable { 50 51 private final Log LOG = LogFactory.getLog(TableInputFormat.class); 52 private Configuration conf = null; 53 private HTable table; 54 private byte[][] startKeys; 55 56 /** 57 * Gets the partition number for a given key (hence record) given the total 58 * number of partitions i.e. number of reduce-tasks for the job. 59 * 60 * <p>Typically a hash function on a all or a subset of the key.</p> 61 * 62 * @param key The key to be partitioned. 63 * @param value The entry value. 64 * @param numPartitions The total number of partitions. 65 * @return The partition number for the <code>key</code>. 66 * @see org.apache.hadoop.mapreduce.Partitioner#getPartition( 67 * java.lang.Object, java.lang.Object, int) 68 */ 69 @Override 70 public int getPartition(ImmutableBytesWritable key, 71 VALUE value, int numPartitions) { 72 byte[] region = null; 73 // Only one region return 0 74 if (this.startKeys.length == 1){ 75 return 0; 76 } 77 try { 78 // Not sure if this is cached after a split so we could have problems 79 // here if a region splits while mapping 80 region = table.getRegionLocation(key.get()).getRegionInfo().getStartKey(); 81 } catch (IOException e) { 82 LOG.error(e); 83 } 84 for (int i = 0; i < this.startKeys.length; i++){ 85 if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){ 86 if (i >= numPartitions-1){ 87 // cover if we have less reduces then regions. 88 return (Integer.toString(i).hashCode() 89 & Integer.MAX_VALUE) % numPartitions; 90 } 91 return i; 92 } 93 } 94 // if above fails to find start key that match we need to return something 95 return 0; 96 } 97 98 /** 99 * Returns the current configuration. 100 * 101 * @return The current configuration. 102 * @see org.apache.hadoop.conf.Configurable#getConf() 103 */ 104 @Override 105 public Configuration getConf() { 106 return conf; 107 } 108 109 /** 110 * Sets the configuration. This is used to determine the start keys for the 111 * given table. 112 * 113 * @param configuration The configuration to set. 114 * @see org.apache.hadoop.conf.Configurable#setConf( 115 * org.apache.hadoop.conf.Configuration) 116 */ 117 @Override 118 public void setConf(Configuration configuration) { 119 this.conf = configuration; 120 try { 121 HBaseConfiguration.addHbaseResources(conf); 122 this.table = new HTable(this.conf, 123 configuration.get(TableOutputFormat.OUTPUT_TABLE)); 124 } catch (IOException e) { 125 LOG.error(e); 126 } 127 try { 128 this.startKeys = this.table.getStartKeys(); 129 } catch (IOException e) { 130 LOG.error(e); 131 } 132 } 133 }