1 /** 2 * Copyright 2010 The Apache Software Foundation 3 * 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 package org.apache.hadoop.hbase.mapreduce; 21 22 import java.io.IOException; 23 24 import org.apache.hadoop.hbase.client.HTable; 25 import org.apache.hadoop.hbase.client.Result; 26 import org.apache.hadoop.hbase.client.Scan; 27 import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 28 import org.apache.hadoop.mapreduce.InputSplit; 29 import org.apache.hadoop.mapreduce.RecordReader; 30 import org.apache.hadoop.mapreduce.TaskAttemptContext; 31 32 /** 33 * Iterate over an HBase table data, return (ImmutableBytesWritable, Result) 34 * pairs. 35 */ 36 public class TableRecordReader 37 extends RecordReader<ImmutableBytesWritable, Result> { 38 39 private TableRecordReaderImpl recordReaderImpl = new TableRecordReaderImpl(); 40 41 /** 42 * Restart from survivable exceptions by creating a new scanner. 43 * 44 * @param firstRow The first row to start at. 45 * @throws IOException When restarting fails. 46 */ 47 public void restart(byte[] firstRow) throws IOException { 48 this.recordReaderImpl.restart(firstRow); 49 } 50 51 /** 52 * Build the scanner. Not done in constructor to allow for extension. 53 * 54 * @throws IOException When restarting the scan fails. 55 */ 56 public void init() throws IOException { 57 this.recordReaderImpl.init(); 58 } 59 60 /** 61 * Sets the HBase table. 62 * 63 * @param htable The {@link HTable} to scan. 64 */ 65 public void setHTable(HTable htable) { 66 this.recordReaderImpl.setHTable(htable); 67 } 68 69 /** 70 * Sets the scan defining the actual details like columns etc. 71 * 72 * @param scan The scan to set. 73 */ 74 public void setScan(Scan scan) { 75 this.recordReaderImpl.setScan(scan); 76 } 77 78 /** 79 * Closes the split. 80 * 81 * @see org.apache.hadoop.mapreduce.RecordReader#close() 82 */ 83 @Override 84 public void close() { 85 this.recordReaderImpl.close(); 86 } 87 88 /** 89 * Returns the current key. 90 * 91 * @return The current key. 92 * @throws IOException 93 * @throws InterruptedException When the job is aborted. 94 * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentKey() 95 */ 96 @Override 97 public ImmutableBytesWritable getCurrentKey() throws IOException, 98 InterruptedException { 99 return this.recordReaderImpl.getCurrentKey(); 100 } 101 102 /** 103 * Returns the current value. 104 * 105 * @return The current value. 106 * @throws IOException When the value is faulty. 107 * @throws InterruptedException When the job is aborted. 108 * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentValue() 109 */ 110 @Override 111 public Result getCurrentValue() throws IOException, InterruptedException { 112 return this.recordReaderImpl.getCurrentValue(); 113 } 114 115 /** 116 * Initializes the reader. 117 * 118 * @param inputsplit The split to work with. 119 * @param context The current task context. 120 * @throws IOException When setting up the reader fails. 121 * @throws InterruptedException When the job is aborted. 122 * @see org.apache.hadoop.mapreduce.RecordReader#initialize( 123 * org.apache.hadoop.mapreduce.InputSplit, 124 * org.apache.hadoop.mapreduce.TaskAttemptContext) 125 */ 126 @Override 127 public void initialize(InputSplit inputsplit, 128 TaskAttemptContext context) throws IOException, 129 InterruptedException { 130 } 131 132 /** 133 * Positions the record reader to the next record. 134 * 135 * @return <code>true</code> if there was another record. 136 * @throws IOException When reading the record failed. 137 * @throws InterruptedException When the job was aborted. 138 * @see org.apache.hadoop.mapreduce.RecordReader#nextKeyValue() 139 */ 140 @Override 141 public boolean nextKeyValue() throws IOException, InterruptedException { 142 return this.recordReaderImpl.nextKeyValue(); 143 } 144 145 /** 146 * The current progress of the record reader through its data. 147 * 148 * @return A number between 0.0 and 1.0, the fraction of the data read. 149 * @see org.apache.hadoop.mapreduce.RecordReader#getProgress() 150 */ 151 @Override 152 public float getProgress() { 153 return this.recordReaderImpl.getProgress(); 154 } 155 }