1 /** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 package org.apache.hadoop.hbase.mapreduce; 20 21 import java.io.IOException; 22 23 import org.apache.hadoop.classification.InterfaceAudience; 24 import org.apache.hadoop.classification.InterfaceStability; 25 import org.apache.hadoop.hbase.client.HTable; 26 import org.apache.hadoop.hbase.client.Result; 27 import org.apache.hadoop.hbase.client.Scan; 28 import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 29 import org.apache.hadoop.mapreduce.InputSplit; 30 import org.apache.hadoop.mapreduce.RecordReader; 31 import org.apache.hadoop.mapreduce.TaskAttemptContext; 32 33 /** 34 * Iterate over an HBase table data, return (ImmutableBytesWritable, Result) 35 * pairs. 36 */ 37 @InterfaceAudience.Public 38 @InterfaceStability.Stable 39 public class TableRecordReader 40 extends RecordReader<ImmutableBytesWritable, Result> { 41 42 private TableRecordReaderImpl recordReaderImpl = new TableRecordReaderImpl(); 43 44 /** 45 * Restart from survivable exceptions by creating a new scanner. 46 * 47 * @param firstRow The first row to start at. 48 * @throws IOException When restarting fails. 49 */ 50 public void restart(byte[] firstRow) throws IOException { 51 this.recordReaderImpl.restart(firstRow); 52 } 53 54 55 /** 56 * Sets the HBase table. 57 * 58 * @param htable The {@link HTable} to scan. 59 */ 60 public void setHTable(HTable htable) { 61 this.recordReaderImpl.setHTable(htable); 62 } 63 64 /** 65 * Sets the scan defining the actual details like columns etc. 66 * 67 * @param scan The scan to set. 68 */ 69 public void setScan(Scan scan) { 70 this.recordReaderImpl.setScan(scan); 71 } 72 73 /** 74 * Closes the split. 75 * 76 * @see org.apache.hadoop.mapreduce.RecordReader#close() 77 */ 78 @Override 79 public void close() { 80 this.recordReaderImpl.close(); 81 } 82 83 /** 84 * Returns the current key. 85 * 86 * @return The current key. 87 * @throws IOException 88 * @throws InterruptedException When the job is aborted. 89 * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentKey() 90 */ 91 @Override 92 public ImmutableBytesWritable getCurrentKey() throws IOException, 93 InterruptedException { 94 return this.recordReaderImpl.getCurrentKey(); 95 } 96 97 /** 98 * Returns the current value. 99 * 100 * @return The current value. 101 * @throws IOException When the value is faulty. 102 * @throws InterruptedException When the job is aborted. 103 * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentValue() 104 */ 105 @Override 106 public Result getCurrentValue() throws IOException, InterruptedException { 107 return this.recordReaderImpl.getCurrentValue(); 108 } 109 110 /** 111 * Initializes the reader. 112 * 113 * @param inputsplit The split to work with. 114 * @param context The current task context. 115 * @throws IOException When setting up the reader fails. 116 * @throws InterruptedException When the job is aborted. 117 * @see org.apache.hadoop.mapreduce.RecordReader#initialize( 118 * org.apache.hadoop.mapreduce.InputSplit, 119 * org.apache.hadoop.mapreduce.TaskAttemptContext) 120 */ 121 @Override 122 public void initialize(InputSplit inputsplit, 123 TaskAttemptContext context) throws IOException, 124 InterruptedException { 125 this.recordReaderImpl.initialize(inputsplit, context); 126 } 127 128 /** 129 * Positions the record reader to the next record. 130 * 131 * @return <code>true</code> if there was another record. 132 * @throws IOException When reading the record failed. 133 * @throws InterruptedException When the job was aborted. 134 * @see org.apache.hadoop.mapreduce.RecordReader#nextKeyValue() 135 */ 136 @Override 137 public boolean nextKeyValue() throws IOException, InterruptedException { 138 return this.recordReaderImpl.nextKeyValue(); 139 } 140 141 /** 142 * The current progress of the record reader through its data. 143 * 144 * @return A number between 0.0 and 1.0, the fraction of the data read. 145 * @see org.apache.hadoop.mapreduce.RecordReader#getProgress() 146 */ 147 @Override 148 public float getProgress() { 149 return this.recordReaderImpl.getProgress(); 150 } 151 }