View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertTrue;
25  
26  import java.io.ByteArrayInputStream;
27  import java.io.DataInputStream;
28  import java.io.IOException;
29  import java.nio.ByteBuffer;
30  import java.util.ArrayList;
31  import java.util.List;
32  import java.util.Random;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FSDataInputStream;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.hbase.HBaseTestingUtility;
41  import org.apache.hadoop.hbase.KeyValue;
42  import org.apache.hadoop.hbase.KeyValue.KVComparator;
43  import org.apache.hadoop.hbase.SmallTests;
44  import org.apache.hadoop.hbase.io.compress.Compression;
45  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
46  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
47  import org.apache.hadoop.hbase.util.Bytes;
48  import org.apache.hadoop.hbase.util.Writables;
49  import org.apache.hadoop.io.Text;
50  import org.apache.hadoop.io.WritableUtils;
51  import org.junit.Before;
52  import org.junit.Test;
53  import org.junit.experimental.categories.Category;
54  
55  /**
56   * Testing writing a version 2 {@link HFile}. This is a low-level test written
57   * during the development of {@link HFileWriterV2}.
58   */
59  @Category(SmallTests.class)
60  public class TestHFileWriterV2 {
61  
62    private static final Log LOG = LogFactory.getLog(TestHFileWriterV2.class);
63  
64    private static final HBaseTestingUtility TEST_UTIL =
65        new HBaseTestingUtility();
66  
67    private Configuration conf;
68    private FileSystem fs;
69  
70    @Before
71    public void setUp() throws IOException {
72      conf = TEST_UTIL.getConfiguration();
73      fs = FileSystem.get(conf);
74    }
75  
76    @Test
77    public void testHFileFormatV2() throws IOException {
78      Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "testHFileFormatV2");
79      final Compression.Algorithm compressAlgo = Compression.Algorithm.GZ;
80      final int entryCount = 10000;
81      writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, false);
82    }
83  
84    @Test
85    public void testMidKeyInHFile() throws IOException{
86      Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
87      "testMidKeyInHFile");
88      Compression.Algorithm compressAlgo = Compression.Algorithm.NONE;
89      int entryCount = 50000;
90      writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, true);
91    }
92  
93    private void writeDataAndReadFromHFile(Path hfilePath,
94        Algorithm compressAlgo, int entryCount, boolean findMidKey) throws IOException {
95  
96      HFileContext context = new HFileContextBuilder()
97                             .withBlockSize(4096)
98                             .withCompression(compressAlgo)
99                             .build();
100     HFileWriterV2 writer = (HFileWriterV2)
101         new HFileWriterV2.WriterFactoryV2(conf, new CacheConfig(conf))
102             .withPath(fs, hfilePath)
103             .withFileContext(context)
104             .create();
105 
106     Random rand = new Random(9713312); // Just a fixed seed.
107     List<KeyValue> keyValues = new ArrayList<KeyValue>(entryCount);
108 
109     for (int i = 0; i < entryCount; ++i) {
110       byte[] keyBytes = randomOrderedKey(rand, i);
111 
112       // A random-length random value.
113       byte[] valueBytes = randomValue(rand);
114       KeyValue keyValue = new KeyValue(keyBytes, null, null, valueBytes);
115       writer.append(keyValue);
116       keyValues.add(keyValue);
117     }
118 
119     // Add in an arbitrary order. They will be sorted lexicographically by
120     // the key.
121     writer.appendMetaBlock("CAPITAL_OF_USA", new Text("Washington, D.C."));
122     writer.appendMetaBlock("CAPITAL_OF_RUSSIA", new Text("Moscow"));
123     writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris"));
124 
125     writer.close();
126     
127 
128     FSDataInputStream fsdis = fs.open(hfilePath);
129 
130     // A "manual" version of a new-format HFile reader. This unit test was
131     // written before the V2 reader was fully implemented.
132 
133     long fileSize = fs.getFileStatus(hfilePath).getLen();
134     FixedFileTrailer trailer =
135         FixedFileTrailer.readFromStream(fsdis, fileSize);
136 
137     assertEquals(2, trailer.getMajorVersion());
138     assertEquals(entryCount, trailer.getEntryCount());
139 
140     HFileContext meta = new HFileContextBuilder()
141                         .withHBaseCheckSum(true)
142                         .withIncludesMvcc(false)
143                         .withIncludesTags(false)
144                         .withCompression(compressAlgo)
145                         .build();
146     
147     HFileBlock.FSReader blockReader = new HFileBlock.FSReaderV2(fsdis, fileSize, meta);
148     // Comparator class name is stored in the trailer in version 2.
149     KVComparator comparator = trailer.createComparator();
150     HFileBlockIndex.BlockIndexReader dataBlockIndexReader =
151         new HFileBlockIndex.BlockIndexReader(comparator,
152             trailer.getNumDataIndexLevels());
153     HFileBlockIndex.BlockIndexReader metaBlockIndexReader =
154         new HFileBlockIndex.BlockIndexReader(
155             KeyValue.RAW_COMPARATOR, 1);
156 
157     HFileBlock.BlockIterator blockIter = blockReader.blockRange(
158         trailer.getLoadOnOpenDataOffset(),
159         fileSize - trailer.getTrailerSize());
160     // Data index. We also read statistics about the block index written after
161     // the root level.
162     dataBlockIndexReader.readMultiLevelIndexRoot(
163         blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
164         trailer.getDataIndexCount());
165     
166     if (findMidKey) {
167       byte[] midkey = dataBlockIndexReader.midkey();
168       assertNotNull("Midkey should not be null", midkey);
169     }
170     
171     // Meta index.
172     metaBlockIndexReader.readRootIndex(
173         blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX).getByteStream(),
174         trailer.getMetaIndexCount());
175     // File info
176     FileInfo fileInfo = new FileInfo();
177     fileInfo.read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
178     byte [] keyValueFormatVersion = fileInfo.get(
179         HFileWriterV2.KEY_VALUE_VERSION);
180     boolean includeMemstoreTS = keyValueFormatVersion != null &&
181         Bytes.toInt(keyValueFormatVersion) > 0;
182 
183     // Counters for the number of key/value pairs and the number of blocks
184     int entriesRead = 0;
185     int blocksRead = 0;
186     long memstoreTS = 0;
187 
188     // Scan blocks the way the reader would scan them
189     fsdis.seek(0);
190     long curBlockPos = 0;
191     while (curBlockPos <= trailer.getLastDataBlockOffset()) {
192       HFileBlock block = blockReader.readBlockData(curBlockPos, -1, -1, false);
193       assertEquals(BlockType.DATA, block.getBlockType());
194       ByteBuffer buf = block.getBufferWithoutHeader();
195       while (buf.hasRemaining()) {
196         int keyLen = buf.getInt();
197         int valueLen = buf.getInt();
198 
199         byte[] key = new byte[keyLen];
200         buf.get(key);
201 
202         byte[] value = new byte[valueLen];
203         buf.get(value);
204 
205         if (includeMemstoreTS) {
206           ByteArrayInputStream byte_input = new ByteArrayInputStream(buf.array(),
207                                buf.arrayOffset() + buf.position(), buf.remaining());
208           DataInputStream data_input = new DataInputStream(byte_input);
209 
210           memstoreTS = WritableUtils.readVLong(data_input);
211           buf.position(buf.position() + WritableUtils.getVIntSize(memstoreTS));
212         }
213 
214         // A brute-force check to see that all keys and values are correct.
215         assertTrue(Bytes.compareTo(key, keyValues.get(entriesRead).getKey()) == 0);
216         assertTrue(Bytes.compareTo(value, keyValues.get(entriesRead).getValue()) == 0);
217 
218         ++entriesRead;
219       }
220       ++blocksRead;
221       curBlockPos += block.getOnDiskSizeWithHeader();
222     }
223     LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead="
224         + blocksRead);
225     assertEquals(entryCount, entriesRead);
226 
227     // Meta blocks. We can scan until the load-on-open data offset (which is
228     // the root block index offset in version 2) because we are not testing
229     // intermediate-level index blocks here.
230 
231     int metaCounter = 0;
232     while (fsdis.getPos() < trailer.getLoadOnOpenDataOffset()) {
233       LOG.info("Current offset: " + fsdis.getPos() + ", scanning until " +
234           trailer.getLoadOnOpenDataOffset());
235       HFileBlock block = blockReader.readBlockData(curBlockPos, -1, -1, false);
236       assertEquals(BlockType.META, block.getBlockType());
237       Text t = new Text();
238       ByteBuffer buf = block.getBufferWithoutHeader();
239       if (Writables.getWritable(buf.array(), buf.arrayOffset(), buf.limit(), t) == null) {
240         throw new IOException("Failed to deserialize block " + this + " into a " + t.getClass().getSimpleName());
241       }
242       Text expectedText =
243           (metaCounter == 0 ? new Text("Paris") : metaCounter == 1 ? new Text(
244               "Moscow") : new Text("Washington, D.C."));
245       assertEquals(expectedText, t);
246       LOG.info("Read meta block data: " + t);
247       ++metaCounter;
248       curBlockPos += block.getOnDiskSizeWithHeader();
249     }
250 
251     fsdis.close();
252   }
253 
254 
255   // Static stuff used by various HFile v2 unit tests
256 
257   private static final String COLUMN_FAMILY_NAME = "_-myColumnFamily-_";
258   private static final int MIN_ROW_OR_QUALIFIER_LENGTH = 64;
259   private static final int MAX_ROW_OR_QUALIFIER_LENGTH = 128;
260 
261   /**
262    * Generates a random key that is guaranteed to increase as the given index i
263    * increases. The result consists of a prefix, which is a deterministic
264    * increasing function of i, and a random suffix.
265    *
266    * @param rand
267    *          random number generator to use
268    * @param i
269    * @return
270    */
271   public static byte[] randomOrderedKey(Random rand, int i) {
272     StringBuilder k = new StringBuilder();
273 
274     // The fixed-length lexicographically increasing part of the key.
275     for (int bitIndex = 31; bitIndex >= 0; --bitIndex) {
276       if ((i & (1 << bitIndex)) == 0)
277         k.append("a");
278       else
279         k.append("b");
280     }
281 
282     // A random-length random suffix of the key.
283     for (int j = 0; j < rand.nextInt(50); ++j)
284       k.append(randomReadableChar(rand));
285 
286     byte[] keyBytes = k.toString().getBytes();
287     return keyBytes;
288   }
289 
290   public static byte[] randomValue(Random rand) {
291     StringBuilder v = new StringBuilder();
292     for (int j = 0; j < 1 + rand.nextInt(2000); ++j) {
293       v.append((char) (32 + rand.nextInt(95)));
294     }
295 
296     byte[] valueBytes = v.toString().getBytes();
297     return valueBytes;
298   }
299 
300   public static final char randomReadableChar(Random rand) {
301     int i = rand.nextInt(26 * 2 + 10 + 1);
302     if (i < 26)
303       return (char) ('A' + i);
304     i -= 26;
305 
306     if (i < 26)
307       return (char) ('a' + i);
308     i -= 26;
309 
310     if (i < 10)
311       return (char) ('0' + i);
312     i -= 10;
313 
314     assert i == 0;
315     return '_';
316   }
317 
318   public static byte[] randomRowOrQualifier(Random rand) {
319     StringBuilder field = new StringBuilder();
320     int fieldLen = MIN_ROW_OR_QUALIFIER_LENGTH
321         + rand.nextInt(MAX_ROW_OR_QUALIFIER_LENGTH
322             - MIN_ROW_OR_QUALIFIER_LENGTH + 1);
323     for (int i = 0; i < fieldLen; ++i)
324       field.append(randomReadableChar(rand));
325     return field.toString().getBytes();
326   }
327 
328   public static KeyValue randomKeyValue(Random rand) {
329     return new KeyValue(randomRowOrQualifier(rand),
330         COLUMN_FAMILY_NAME.getBytes(), randomRowOrQualifier(rand),
331         randomValue(rand));
332   }
333 
334 
335 }
336