1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with this 4 * work for additional information regarding copyright ownership. The ASF 5 * licenses this file to you under the Apache License, Version 2.0 (the 6 * "License"); you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14 * License for the specific language governing permissions and limitations 15 * under the License. 16 */ 17 package org.apache.hadoop.hbase.io.encoding; 18 19 import java.io.DataInputStream; 20 import java.io.IOException; 21 import java.nio.ByteBuffer; 22 23 import org.apache.hadoop.classification.InterfaceAudience; 24 import org.apache.hadoop.hbase.KeyValue; 25 import org.apache.hadoop.hbase.KeyValue.KVComparator; 26 import org.apache.hadoop.hbase.io.hfile.HFileContext; 27 28 /** 29 * Encoding of KeyValue. It aims to be fast and efficient using assumptions: 30 * <ul> 31 * <li>the KeyValues are stored sorted by key</li> 32 * <li>we know the structure of KeyValue</li> 33 * <li>the values are always iterated forward from beginning of block</li> 34 * <li>knowledge of Key Value format</li> 35 * </ul> 36 * It is designed to work fast enough to be feasible as in memory compression. 37 * 38 * After encoding, it also optionally compresses the encoded data if a 39 * compression algorithm is specified in HFileBlockEncodingContext argument of 40 * {@link #encodeKeyValues(ByteBuffer, HFileBlockEncodingContext)}. 41 */ 42 @InterfaceAudience.Private 43 public interface DataBlockEncoder { 44 45 /** 46 * Encodes KeyValues. It will first encode key value pairs, and then 47 * optionally do the compression for the encoded data. 48 * 49 * @param in 50 * Source of KeyValue for compression. 51 * @param encodingCtx 52 * the encoding context which will contain encoded uncompressed bytes 53 * as well as compressed encoded bytes if compression is enabled, and 54 * also it will reuse resources across multiple calls. 55 * @throws IOException 56 * If there is an error writing to output stream. 57 */ 58 void encodeKeyValues(ByteBuffer in, HFileBlockEncodingContext encodingCtx) throws IOException; 59 60 /** 61 * Decode. 62 * @param source Compressed stream of KeyValues. 63 * @param decodingCtx 64 * @return Uncompressed block of KeyValues. 65 * @throws IOException If there is an error in source. 66 */ 67 ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx) 68 throws IOException; 69 70 /** 71 * Return first key in block. Useful for indexing. Typically does not make 72 * a deep copy but returns a buffer wrapping a segment of the actual block's 73 * byte array. This is because the first key in block is usually stored 74 * unencoded. 75 * @param block encoded block we want index, the position will not change 76 * @return First key in block. 77 */ 78 ByteBuffer getFirstKeyInBlock(ByteBuffer block); 79 80 /** 81 * Create a HFileBlock seeker which find KeyValues within a block. 82 * @param comparator what kind of comparison should be used 83 * @param decodingCtx 84 * @return A newly created seeker. 85 */ 86 EncodedSeeker createSeeker(KVComparator comparator, 87 HFileBlockDecodingContext decodingCtx); 88 89 /** 90 * Creates a encoder specific encoding context 91 * 92 * @param encoding 93 * encoding strategy used 94 * @param headerBytes 95 * header bytes to be written, put a dummy header here if the header 96 * is unknown 97 * @param meta 98 * HFile meta data 99 * @return a newly created encoding context 100 */ 101 HFileBlockEncodingContext newDataBlockEncodingContext( 102 DataBlockEncoding encoding, byte[] headerBytes, HFileContext meta); 103 104 /** 105 * Creates an encoder specific decoding context, which will prepare the data 106 * before actual decoding 107 * 108 * @param meta 109 * HFile meta data 110 * @return a newly created decoding context 111 */ 112 HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta); 113 114 /** 115 * An interface which enable to seek while underlying data is encoded. 116 * 117 * It works on one HFileBlock, but it is reusable. See 118 * {@link #setCurrentBuffer(ByteBuffer)}. 119 */ 120 interface EncodedSeeker { 121 /** 122 * Set on which buffer there will be done seeking. 123 * @param buffer Used for seeking. 124 */ 125 void setCurrentBuffer(ByteBuffer buffer); 126 127 /** 128 * Does a deep copy of the key at the current position. A deep copy is 129 * necessary because buffers are reused in the decoder. 130 * @return key at current position 131 */ 132 ByteBuffer getKeyDeepCopy(); 133 134 /** 135 * Does a shallow copy of the value at the current position. A shallow 136 * copy is possible because the returned buffer refers to the backing array 137 * of the original encoded buffer. 138 * @return value at current position 139 */ 140 ByteBuffer getValueShallowCopy(); 141 142 /** @return key value at current position with position set to limit */ 143 ByteBuffer getKeyValueBuffer(); 144 145 /** 146 * @return the KeyValue object at the current position. Includes memstore 147 * timestamp. 148 */ 149 KeyValue getKeyValue(); 150 151 /** Set position to beginning of given block */ 152 void rewind(); 153 154 /** 155 * Move to next position 156 * @return true on success, false if there is no more positions. 157 */ 158 boolean next(); 159 160 /** 161 * Moves the seeker position within the current block to: 162 * <ul> 163 * <li>the last key that that is less than or equal to the given key if 164 * <code>seekBefore</code> is false</li> 165 * <li>the last key that is strictly less than the given key if <code> 166 * seekBefore</code> is true. The caller is responsible for loading the 167 * previous block if the requested key turns out to be the first key of the 168 * current block.</li> 169 * </ul> 170 * @param key byte array containing the key 171 * @param offset key position the array 172 * @param length key length in bytes 173 * @param seekBefore find the key strictly less than the given key in case 174 * of an exact match. Does not matter in case of an inexact match. 175 * @return 0 on exact match, 1 on inexact match. 176 */ 177 int seekToKeyInBlock( 178 byte[] key, int offset, int length, boolean seekBefore 179 ); 180 181 /** 182 * Compare the given key against the current key 183 * @param comparator 184 * @param key 185 * @param offset 186 * @param length 187 * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater 188 */ 189 public int compareKey(KVComparator comparator, byte[] key, int offset, int length); 190 } 191 }