1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.io.hfile;
21
22 import static org.junit.Assert.assertEquals;
23 import static org.junit.Assert.assertFalse;
24 import static org.junit.Assert.assertTrue;
25
26 import java.io.ByteArrayOutputStream;
27 import java.io.DataOutputStream;
28 import java.io.IOException;
29 import java.nio.ByteBuffer;
30 import java.util.ArrayList;
31 import java.util.Arrays;
32 import java.util.Collection;
33 import java.util.HashSet;
34 import java.util.List;
35 import java.util.Random;
36 import java.util.Set;
37
38 import org.apache.commons.logging.Log;
39 import org.apache.commons.logging.LogFactory;
40 import org.apache.hadoop.conf.Configuration;
41 import org.apache.hadoop.fs.FSDataInputStream;
42 import org.apache.hadoop.fs.FSDataOutputStream;
43 import org.apache.hadoop.fs.FileSystem;
44 import org.apache.hadoop.fs.Path;
45 import org.apache.hadoop.hbase.HBaseTestingUtility;
46 import org.apache.hadoop.hbase.HConstants;
47 import org.apache.hadoop.hbase.KeyValue;
48 import org.apache.hadoop.hbase.MediumTests;
49 import org.apache.hadoop.hbase.fs.HFileSystem;
50 import org.apache.hadoop.hbase.io.compress.Compression;
51 import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk;
52 import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader;
53 import org.apache.hadoop.hbase.util.Bytes;
54 import org.apache.hadoop.hbase.util.ClassSize;
55 import org.junit.Before;
56 import org.junit.Test;
57 import org.junit.experimental.categories.Category;
58 import org.junit.runner.RunWith;
59 import org.junit.runners.Parameterized;
60 import org.junit.runners.Parameterized.Parameters;
61
62 @RunWith(Parameterized.class)
63 @Category(MediumTests.class)
64 public class TestHFileBlockIndex {
65
66 @Parameters
67 public static Collection<Object[]> compressionAlgorithms() {
68 return HBaseTestingUtility.COMPRESSION_ALGORITHMS_PARAMETERIZED;
69 }
70
71 public TestHFileBlockIndex(Compression.Algorithm compr) {
72 this.compr = compr;
73 }
74
75 private static final Log LOG = LogFactory.getLog(TestHFileBlockIndex.class);
76
77 private static final int NUM_DATA_BLOCKS = 1000;
78 private static final HBaseTestingUtility TEST_UTIL =
79 new HBaseTestingUtility();
80
81 private static final int SMALL_BLOCK_SIZE = 4096;
82 private static final int NUM_KV = 10000;
83
84 private static FileSystem fs;
85 private Path path;
86 private Random rand;
87 private long rootIndexOffset;
88 private int numRootEntries;
89 private int numLevels;
90 private static final List<byte[]> keys = new ArrayList<byte[]>();
91 private final Compression.Algorithm compr;
92 private byte[] firstKeyInFile;
93 private Configuration conf;
94
95 private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 };
96 private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 };
97 private static final int[] UNCOMPRESSED_INDEX_SIZES =
98 { 19187, 21813, 23086 };
99
100 private static final boolean includesMemstoreTS = true;
101
102 static {
103 assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length;
104 assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length;
105 }
106
107 @Before
108 public void setUp() throws IOException {
109 keys.clear();
110 rand = new Random(2389757);
111 firstKeyInFile = null;
112 conf = TEST_UTIL.getConfiguration();
113
114
115 conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
116
117 fs = HFileSystem.get(conf);
118 }
119
120 @Test
121 public void testBlockIndex() throws IOException {
122 testBlockIndexInternals(false);
123 clear();
124 testBlockIndexInternals(true);
125 }
126
127 private void clear() throws IOException {
128 keys.clear();
129 rand = new Random(2389757);
130 firstKeyInFile = null;
131 conf = TEST_UTIL.getConfiguration();
132
133
134 conf.setInt(HFile.FORMAT_VERSION_KEY, 3);
135
136 fs = HFileSystem.get(conf);
137 }
138
139 protected void testBlockIndexInternals(boolean useTags) throws IOException {
140 path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr + useTags);
141 writeWholeIndex(useTags);
142 readIndex(useTags);
143 }
144
145
146
147
148
149 private static class BlockReaderWrapper implements HFile.CachingBlockReader {
150
151 private HFileBlock.FSReader realReader;
152 private long prevOffset;
153 private long prevOnDiskSize;
154 private boolean prevPread;
155 private HFileBlock prevBlock;
156
157 public int hitCount = 0;
158 public int missCount = 0;
159
160 public BlockReaderWrapper(HFileBlock.FSReader realReader) {
161 this.realReader = realReader;
162 }
163
164 @Override
165 public HFileBlock readBlock(long offset, long onDiskSize,
166 boolean cacheBlock, boolean pread, boolean isCompaction,
167 boolean updateCacheMetrics, BlockType expectedBlockType)
168 throws IOException {
169 if (offset == prevOffset && onDiskSize == prevOnDiskSize &&
170 pread == prevPread) {
171 hitCount += 1;
172 return prevBlock;
173 }
174
175 missCount += 1;
176 prevBlock = realReader.readBlockData(offset, onDiskSize,
177 -1, pread);
178 prevOffset = offset;
179 prevOnDiskSize = onDiskSize;
180 prevPread = pread;
181
182 return prevBlock;
183 }
184 }
185
186 public void readIndex(boolean useTags) throws IOException {
187 long fileSize = fs.getFileStatus(path).getLen();
188 LOG.info("Size of " + path + ": " + fileSize);
189
190 FSDataInputStream istream = fs.open(path);
191 HFileContext meta = new HFileContextBuilder()
192 .withHBaseCheckSum(true)
193 .withIncludesMvcc(includesMemstoreTS)
194 .withIncludesTags(useTags)
195 .withCompression(compr)
196 .build();
197 HFileBlock.FSReader blockReader = new HFileBlock.FSReaderV2(istream, fs.getFileStatus(path)
198 .getLen(), meta);
199
200 BlockReaderWrapper brw = new BlockReaderWrapper(blockReader);
201 HFileBlockIndex.BlockIndexReader indexReader =
202 new HFileBlockIndex.BlockIndexReader(
203 KeyValue.RAW_COMPARATOR, numLevels, brw);
204
205 indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset,
206 fileSize).nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries);
207
208 long prevOffset = -1;
209 int i = 0;
210 int expectedHitCount = 0;
211 int expectedMissCount = 0;
212 LOG.info("Total number of keys: " + keys.size());
213 for (byte[] key : keys) {
214 assertTrue(key != null);
215 assertTrue(indexReader != null);
216 HFileBlock b = indexReader.seekToDataBlock(key, 0, key.length, null,
217 true, true, false);
218 if (Bytes.BYTES_RAWCOMPARATOR.compare(key, firstKeyInFile) < 0) {
219 assertTrue(b == null);
220 ++i;
221 continue;
222 }
223
224 String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key);
225
226 assertTrue("seekToDataBlock failed for " + keyStr, b != null);
227
228 if (prevOffset == b.getOffset()) {
229 assertEquals(++expectedHitCount, brw.hitCount);
230 } else {
231 LOG.info("First key in a new block: " + keyStr + ", block offset: "
232 + b.getOffset() + ")");
233 assertTrue(b.getOffset() > prevOffset);
234 assertEquals(++expectedMissCount, brw.missCount);
235 prevOffset = b.getOffset();
236 }
237 ++i;
238 }
239
240 istream.close();
241 }
242
243 private void writeWholeIndex(boolean useTags) throws IOException {
244 assertEquals(0, keys.size());
245 HFileContext meta = new HFileContextBuilder()
246 .withHBaseCheckSum(true)
247 .withIncludesMvcc(includesMemstoreTS)
248 .withIncludesTags(useTags)
249 .withCompression(compr)
250 .withChecksumType(HFile.DEFAULT_CHECKSUM_TYPE)
251 .withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM)
252 .build();
253 HFileBlock.Writer hbw = new HFileBlock.Writer(null,
254 meta);
255 FSDataOutputStream outputStream = fs.create(path);
256 HFileBlockIndex.BlockIndexWriter biw =
257 new HFileBlockIndex.BlockIndexWriter(hbw, null, null);
258
259 for (int i = 0; i < NUM_DATA_BLOCKS; ++i) {
260 hbw.startWriting(BlockType.DATA).write(
261 String.valueOf(rand.nextInt(1000)).getBytes());
262 long blockOffset = outputStream.getPos();
263 hbw.writeHeaderAndData(outputStream);
264
265 byte[] firstKey = null;
266 for (int j = 0; j < 16; ++j) {
267 byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i * 16 + j);
268 keys.add(k);
269 if (j == 8)
270 firstKey = k;
271 }
272 assertTrue(firstKey != null);
273 if (firstKeyInFile == null)
274 firstKeyInFile = firstKey;
275 biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader());
276
277 writeInlineBlocks(hbw, outputStream, biw, false);
278 }
279 writeInlineBlocks(hbw, outputStream, biw, true);
280 rootIndexOffset = biw.writeIndexBlocks(outputStream);
281 outputStream.close();
282
283 numLevels = biw.getNumLevels();
284 numRootEntries = biw.getNumRootEntries();
285
286 LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" +
287 numRootEntries + ", rootIndexOffset=" + rootIndexOffset);
288 }
289
290 private void writeInlineBlocks(HFileBlock.Writer hbw,
291 FSDataOutputStream outputStream, HFileBlockIndex.BlockIndexWriter biw,
292 boolean isClosing) throws IOException {
293 while (biw.shouldWriteBlock(isClosing)) {
294 long offset = outputStream.getPos();
295 biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType()));
296 hbw.writeHeaderAndData(outputStream);
297 biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(),
298 hbw.getUncompressedSizeWithoutHeader());
299 LOG.info("Wrote an inline index block at " + offset + ", size " +
300 hbw.getOnDiskSizeWithHeader());
301 }
302 }
303
304 private static final long getDummyFileOffset(int i) {
305 return i * 185 + 379;
306 }
307
308 private static final int getDummyOnDiskSize(int i) {
309 return i * i * 37 + i * 19 + 13;
310 }
311
312 @Test
313 public void testSecondaryIndexBinarySearch() throws IOException {
314 int numTotalKeys = 99;
315 assertTrue(numTotalKeys % 2 == 1);
316
317
318 int numSearchedKeys = (numTotalKeys - 1) / 2;
319
320 ByteArrayOutputStream baos = new ByteArrayOutputStream();
321 DataOutputStream dos = new DataOutputStream(baos);
322
323 dos.writeInt(numSearchedKeys);
324 int curAllEntriesSize = 0;
325 int numEntriesAdded = 0;
326
327
328
329 int secondaryIndexEntries[] = new int[numTotalKeys];
330
331 for (int i = 0; i < numTotalKeys; ++i) {
332 byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i * 2);
333 keys.add(k);
334 String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): ";
335 StringBuilder padding = new StringBuilder();
336 while (msgPrefix.length() + padding.length() < 70)
337 padding.append(' ');
338 msgPrefix += padding;
339 if (i % 2 == 1) {
340 dos.writeInt(curAllEntriesSize);
341 secondaryIndexEntries[i] = curAllEntriesSize;
342 LOG.info(msgPrefix + "secondary index entry #" + ((i - 1) / 2) +
343 ", offset " + curAllEntriesSize);
344 curAllEntriesSize += k.length
345 + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
346 ++numEntriesAdded;
347 } else {
348 secondaryIndexEntries[i] = -1;
349 LOG.info(msgPrefix + "not in the searched array");
350 }
351 }
352
353
354 for (int i = 0; i < keys.size() - 1; ++i)
355 assertTrue(Bytes.BYTES_RAWCOMPARATOR.compare(keys.get(i),
356 keys.get(i + 1)) < 0);
357
358 dos.writeInt(curAllEntriesSize);
359 assertEquals(numSearchedKeys, numEntriesAdded);
360 int secondaryIndexOffset = dos.size();
361 assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2),
362 secondaryIndexOffset);
363
364 for (int i = 1; i <= numTotalKeys - 1; i += 2) {
365 assertEquals(dos.size(),
366 secondaryIndexOffset + secondaryIndexEntries[i]);
367 long dummyFileOffset = getDummyFileOffset(i);
368 int dummyOnDiskSize = getDummyOnDiskSize(i);
369 LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" +
370 dummyOnDiskSize + " at offset " + dos.size());
371 dos.writeLong(dummyFileOffset);
372 dos.writeInt(dummyOnDiskSize);
373 LOG.debug("Stored key " + ((i - 1) / 2) +" at offset " + dos.size());
374 dos.write(keys.get(i));
375 }
376
377 dos.writeInt(curAllEntriesSize);
378
379 ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray());
380 for (int i = 0; i < numTotalKeys; ++i) {
381 byte[] searchKey = keys.get(i);
382 byte[] arrayHoldingKey = new byte[searchKey.length +
383 searchKey.length / 2];
384
385
386
387 System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2,
388 searchKey.length);
389
390 int searchResult = BlockIndexReader.binarySearchNonRootIndex(
391 arrayHoldingKey, searchKey.length / 2, searchKey.length, nonRootIndex,
392 KeyValue.RAW_COMPARATOR);
393 String lookupFailureMsg = "Failed to look up key #" + i + " ("
394 + Bytes.toStringBinary(searchKey) + ")";
395
396 int expectedResult;
397 int referenceItem;
398
399 if (i % 2 == 1) {
400
401
402 expectedResult = (i - 1) / 2;
403 referenceItem = i;
404 } else {
405
406
407
408 expectedResult = i / 2 - 1;
409 referenceItem = i - 1;
410 }
411
412 assertEquals(lookupFailureMsg, expectedResult, searchResult);
413
414
415
416 boolean locateBlockResult =
417 (BlockIndexReader.locateNonRootIndexEntry(nonRootIndex, arrayHoldingKey,
418 searchKey.length / 2, searchKey.length, KeyValue.RAW_COMPARATOR) != -1);
419
420 if (i == 0) {
421 assertFalse(locateBlockResult);
422 } else {
423 assertTrue(locateBlockResult);
424 String errorMsg = "i=" + i + ", position=" + nonRootIndex.position();
425 assertEquals(errorMsg, getDummyFileOffset(referenceItem),
426 nonRootIndex.getLong());
427 assertEquals(errorMsg, getDummyOnDiskSize(referenceItem),
428 nonRootIndex.getInt());
429 }
430 }
431
432 }
433
434 @Test
435 public void testBlockIndexChunk() throws IOException {
436 BlockIndexChunk c = new BlockIndexChunk();
437 ByteArrayOutputStream baos = new ByteArrayOutputStream();
438 int N = 1000;
439 int[] numSubEntriesAt = new int[N];
440 int numSubEntries = 0;
441 for (int i = 0; i < N; ++i) {
442 baos.reset();
443 DataOutputStream dos = new DataOutputStream(baos);
444 c.writeNonRoot(dos);
445 assertEquals(c.getNonRootSize(), dos.size());
446
447 baos.reset();
448 dos = new DataOutputStream(baos);
449 c.writeRoot(dos);
450 assertEquals(c.getRootSize(), dos.size());
451
452 byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
453 numSubEntries += rand.nextInt(5) + 1;
454 keys.add(k);
455 c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries);
456 }
457
458
459
460
461 for (int i = 0; i < N; ++i) {
462 for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1];
463 j < numSubEntriesAt[i];
464 ++j) {
465 assertEquals(i, c.getEntryBySubEntry(j));
466 }
467 }
468 }
469
470
471 @Test
472 public void testHeapSizeForBlockIndex() throws IOException {
473 Class<HFileBlockIndex.BlockIndexReader> cl =
474 HFileBlockIndex.BlockIndexReader.class;
475 long expected = ClassSize.estimateBase(cl, false);
476
477 HFileBlockIndex.BlockIndexReader bi =
478 new HFileBlockIndex.BlockIndexReader(KeyValue.RAW_COMPARATOR, 1);
479 long actual = bi.heapSize();
480
481
482
483
484 expected -= ClassSize.align(3 * ClassSize.ARRAY);
485
486 if (expected != actual) {
487 ClassSize.estimateBase(cl, true);
488 assertEquals(expected, actual);
489 }
490 }
491
492
493
494
495
496
497
498
499 @Test
500 public void testHFileWriterAndReader() throws IOException {
501 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
502 "hfile_for_block_index");
503 CacheConfig cacheConf = new CacheConfig(conf);
504 BlockCache blockCache = cacheConf.getBlockCache();
505
506 for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) {
507 int indexBlockSize = INDEX_CHUNK_SIZES[testI];
508 int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
509 LOG.info("Index block size: " + indexBlockSize + ", compression: "
510 + compr);
511
512 blockCache.evictBlocksByHfileName(hfilePath.getName());
513
514 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
515 Set<String> keyStrSet = new HashSet<String>();
516 byte[][] keys = new byte[NUM_KV][];
517 byte[][] values = new byte[NUM_KV][];
518
519
520 {
521 HFileContext meta = new HFileContextBuilder()
522 .withBlockSize(SMALL_BLOCK_SIZE)
523 .withCompression(compr)
524 .build();
525 HFile.Writer writer =
526 HFile.getWriterFactory(conf, cacheConf)
527 .withPath(fs, hfilePath)
528 .withFileContext(meta)
529 .create();
530 Random rand = new Random(19231737);
531
532 for (int i = 0; i < NUM_KV; ++i) {
533 byte[] row = TestHFileWriterV2.randomOrderedKey(rand, i);
534
535
536 byte[] k = KeyValue.createFirstOnRow(row, 0, row.length, row, 0, 0,
537 row, 0, 0).getKey();
538
539 byte[] v = TestHFileWriterV2.randomValue(rand);
540 writer.append(k, v, HConstants.EMPTY_BYTE_ARRAY);
541 keys[i] = k;
542 values[i] = v;
543 keyStrSet.add(Bytes.toStringBinary(k));
544
545 if (i > 0) {
546 assertTrue(KeyValue.COMPARATOR.compareFlatKey(keys[i - 1],
547 keys[i]) < 0);
548 }
549 }
550
551 writer.close();
552 }
553
554
555 HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, conf);
556 assertEquals(expectedNumLevels,
557 reader.getTrailer().getNumDataIndexLevels());
558
559 assertTrue(Bytes.equals(keys[0], reader.getFirstKey()));
560 assertTrue(Bytes.equals(keys[NUM_KV - 1], reader.getLastKey()));
561 LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1]));
562
563 for (boolean pread : new boolean[] { false, true }) {
564 HFileScanner scanner = reader.getScanner(true, pread);
565 for (int i = 0; i < NUM_KV; ++i) {
566 checkSeekTo(keys, scanner, i);
567 checkKeyValue("i=" + i, keys[i], values[i], scanner.getKey(),
568 scanner.getValue());
569 }
570 assertTrue(scanner.seekTo());
571 for (int i = NUM_KV - 1; i >= 0; --i) {
572 checkSeekTo(keys, scanner, i);
573 checkKeyValue("i=" + i, keys[i], values[i], scanner.getKey(),
574 scanner.getValue());
575 }
576 }
577
578
579 HFileReaderV2 reader2 = (HFileReaderV2) reader;
580 HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader();
581
582 HFileBlock.BlockIterator iter = fsReader.blockRange(0,
583 reader.getTrailer().getLoadOnOpenDataOffset());
584 HFileBlock block;
585 List<byte[]> blockKeys = new ArrayList<byte[]>();
586 while ((block = iter.nextBlock()) != null) {
587 if (block.getBlockType() != BlockType.LEAF_INDEX)
588 return;
589 ByteBuffer b = block.getBufferReadOnly();
590 int n = b.getInt();
591
592 int entriesOffset = Bytes.SIZEOF_INT * (n + 2);
593
594
595 for (int i = 0; i < n; ++i) {
596 int keyRelOffset = b.getInt(Bytes.SIZEOF_INT * (i + 1));
597 int nextKeyRelOffset = b.getInt(Bytes.SIZEOF_INT * (i + 2));
598 int keyLen = nextKeyRelOffset - keyRelOffset;
599 int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset +
600 HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
601 byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset
602 + keyLen);
603 String blockKeyStr = Bytes.toString(blockKey);
604 blockKeys.add(blockKey);
605
606
607
608 assertTrue("Invalid block key from leaf-level block: " + blockKeyStr,
609 keyStrSet.contains(blockKeyStr));
610 }
611 }
612
613
614 assertEquals(
615 Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)),
616 Bytes.toStringBinary(reader.midkey()));
617
618 assertEquals(UNCOMPRESSED_INDEX_SIZES[testI],
619 reader.getTrailer().getUncompressedDataIndexSize());
620
621 reader.close();
622 reader2.close();
623 }
624 }
625
626 private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i)
627 throws IOException {
628 assertEquals("Failed to seek to key #" + i + " ("
629 + Bytes.toStringBinary(keys[i]) + ")", 0, scanner.seekTo(keys[i]));
630 }
631
632 private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr,
633 ByteBuffer buf) {
634 assertEquals(msgPrefix + ": expected " + Bytes.toStringBinary(arr)
635 + ", actual " + Bytes.toStringBinary(buf), 0, Bytes.compareTo(arr, 0,
636 arr.length, buf.array(), buf.arrayOffset(), buf.limit()));
637 }
638
639
640 private void checkKeyValue(String msgPrefix, byte[] expectedKey,
641 byte[] expectedValue, ByteBuffer keyRead, ByteBuffer valueRead) {
642 if (!msgPrefix.isEmpty())
643 msgPrefix += ". ";
644
645 assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead);
646 assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue,
647 valueRead);
648 }
649
650
651 }
652