View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import static org.junit.Assert.assertEquals;
20  import static org.junit.Assert.fail;
21  
22  import java.io.ByteArrayInputStream;
23  import java.io.DataInputStream;
24  import java.io.IOException;
25  import java.nio.ByteBuffer;
26  import java.util.ArrayList;
27  import java.util.Collection;
28  import java.util.List;
29  import java.util.Random;
30  
31  import org.apache.hadoop.hbase.HBaseTestingUtility;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.KeyValue;
34  import org.apache.hadoop.hbase.KeyValue.Type;
35  import org.apache.hadoop.hbase.LargeTests;
36  import org.apache.hadoop.hbase.Tag;
37  import org.apache.hadoop.hbase.io.compress.Compression;
38  import org.apache.hadoop.hbase.io.hfile.HFileContext;
39  import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
40  import org.apache.hadoop.hbase.util.Bytes;
41  import org.apache.hadoop.hbase.util.test.RedundantKVGenerator;
42  import org.junit.Test;
43  import org.junit.experimental.categories.Category;
44  import org.junit.runner.RunWith;
45  import org.junit.runners.Parameterized;
46  import org.junit.runners.Parameterized.Parameters;
47  
48  /**
49   * Test all of the data block encoding algorithms for correctness. Most of the
50   * class generate data which will test different branches in code.
51   */
52  @Category(LargeTests.class)
53  @RunWith(Parameterized.class)
54  public class TestDataBlockEncoders {
55  
56    private static int NUMBER_OF_KV = 10000;
57    private static int NUM_RANDOM_SEEKS = 10000;
58  
59    private static int ENCODED_DATA_OFFSET = HConstants.HFILEBLOCK_HEADER_SIZE
60        + DataBlockEncoding.ID_SIZE;
61  
62    private RedundantKVGenerator generator = new RedundantKVGenerator();
63    private Random randomizer = new Random(42l);
64  
65    private final boolean includesMemstoreTS;
66    private final boolean includesTags;
67  
68    @Parameters
69    public static Collection<Object[]> parameters() {
70      return HBaseTestingUtility.MEMSTORETS_TAGS_PARAMETRIZED;
71    }
72    public TestDataBlockEncoders(boolean includesMemstoreTS, boolean includesTag) {
73      this.includesMemstoreTS = includesMemstoreTS;
74      this.includesTags = includesTag;
75    }
76    
77    private HFileBlockEncodingContext getEncodingContext(Compression.Algorithm algo,
78        DataBlockEncoding encoding) {
79      DataBlockEncoder encoder = encoding.getEncoder();
80      HFileContext meta = new HFileContextBuilder()
81                          .withHBaseCheckSum(false)
82                          .withIncludesMvcc(includesMemstoreTS)
83                          .withIncludesTags(includesTags)
84                          .withCompression(algo).build();
85      if (encoder != null) {
86        return encoder.newDataBlockEncodingContext(encoding,
87            HConstants.HFILEBLOCK_DUMMY_HEADER, meta);
88      } else {
89        return new HFileBlockDefaultEncodingContext(encoding,
90            HConstants.HFILEBLOCK_DUMMY_HEADER, meta);
91      }
92    }
93    
94    private byte[] encodeBytes(DataBlockEncoding encoding, ByteBuffer dataset)
95        throws IOException {
96      DataBlockEncoder encoder = encoding.getEncoder();
97      HFileBlockEncodingContext encodingCtx = getEncodingContext(Compression.Algorithm.NONE,
98          encoding);
99  
100     encoder.encodeKeyValues(dataset, encodingCtx);
101 
102     byte[] encodedBytesWithHeader = encodingCtx.getUncompressedBytesWithHeader();
103     byte[] encodedData = new byte[encodedBytesWithHeader.length - ENCODED_DATA_OFFSET];
104     System.arraycopy(encodedBytesWithHeader, ENCODED_DATA_OFFSET, encodedData, 0,
105         encodedData.length);
106     return encodedData;
107   }
108   
109   private void testAlgorithm(ByteBuffer dataset, DataBlockEncoding encoding,
110       List<KeyValue> kvList) throws IOException {
111     // encode
112     byte[] encodedBytes = encodeBytes(encoding, dataset);
113     // decode
114     ByteArrayInputStream bais = new ByteArrayInputStream(encodedBytes);
115     DataInputStream dis = new DataInputStream(bais);
116     ByteBuffer actualDataset;
117     DataBlockEncoder encoder = encoding.getEncoder();
118     HFileContext meta = new HFileContextBuilder()
119                         .withHBaseCheckSum(false)
120                         .withIncludesMvcc(includesMemstoreTS)
121                         .withIncludesTags(includesTags)
122                         .withCompression(Compression.Algorithm.NONE).build();
123     actualDataset = encoder.decodeKeyValues(dis, encoder.newDataBlockDecodingContext(meta));
124     dataset.rewind();
125     actualDataset.rewind();
126 
127     // this is because in case of prefix tree the decoded stream will not have
128     // the
129     // mvcc in it.
130     // if (encoding != DataBlockEncoding.PREFIX_TREE) {
131     assertEquals("Encoding -> decoding gives different results for " + encoder,
132         Bytes.toStringBinary(dataset), Bytes.toStringBinary(actualDataset));
133     // }
134   }
135 
136   /**
137    * Test data block encoding of empty KeyValue.
138    * 
139    * @throws IOException
140    *           On test failure.
141    */
142   @Test
143   public void testEmptyKeyValues() throws IOException {
144     List<KeyValue> kvList = new ArrayList<KeyValue>();
145     byte[] row = new byte[0];
146     byte[] family = new byte[0];
147     byte[] qualifier = new byte[0];
148     byte[] value = new byte[0];
149     if (!includesTags) {
150       kvList.add(new KeyValue(row, family, qualifier, 0l, value));
151       kvList.add(new KeyValue(row, family, qualifier, 0l, value));
152     } else {
153       byte[] metaValue1 = Bytes.toBytes("metaValue1");
154       byte[] metaValue2 = Bytes.toBytes("metaValue2");
155       kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1,
156           metaValue1) }));
157       kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1,
158           metaValue2) }));
159     }
160     testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList, includesMemstoreTS),
161         kvList);
162   }
163 
164   /**
165    * Test KeyValues with negative timestamp.
166    * 
167    * @throws IOException
168    *           On test failure.
169    */
170   @Test
171   public void testNegativeTimestamps() throws IOException {
172     List<KeyValue> kvList = new ArrayList<KeyValue>();
173     byte[] row = new byte[0];
174     byte[] family = new byte[0];
175     byte[] qualifier = new byte[0];
176     byte[] value = new byte[0];
177     if (includesTags) {
178       byte[] metaValue1 = Bytes.toBytes("metaValue1");
179       byte[] metaValue2 = Bytes.toBytes("metaValue2");
180       kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1,
181           metaValue1) }));
182       kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1,
183           metaValue2) }));
184     } else {
185       kvList.add(new KeyValue(row, family, qualifier, -1l, Type.Put, value));
186       kvList.add(new KeyValue(row, family, qualifier, -2l, Type.Put, value));
187     }
188     testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList, includesMemstoreTS),
189         kvList);
190   }
191 
192 
193   /**
194    * Test whether compression -> decompression gives the consistent results on
195    * pseudorandom sample.
196    * @throws IOException On test failure.
197    */
198   @Test
199   public void testExecutionOnSample() throws IOException {
200     List<KeyValue> kvList = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
201     testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList, includesMemstoreTS),
202         kvList);
203   }
204 
205   /**
206    * Test seeking while file is encoded.
207    */
208   @Test
209   public void testSeekingOnSample() throws IOException {
210     List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
211     ByteBuffer originalBuffer = RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
212         includesMemstoreTS);
213 
214     // create all seekers
215     List<DataBlockEncoder.EncodedSeeker> encodedSeekers = new ArrayList<DataBlockEncoder.EncodedSeeker>();
216     for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
217       if (encoding.getEncoder() == null) {
218         continue;
219       }
220 
221       ByteBuffer encodedBuffer = ByteBuffer.wrap(encodeBytes(encoding, originalBuffer));
222       DataBlockEncoder encoder = encoding.getEncoder();
223       HFileContext meta = new HFileContextBuilder()
224                           .withHBaseCheckSum(false)
225                           .withIncludesMvcc(includesMemstoreTS)
226                           .withIncludesTags(includesTags)
227                           .withCompression(Compression.Algorithm.NONE)
228                           .build();
229       DataBlockEncoder.EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR,
230           encoder.newDataBlockDecodingContext(meta));
231       seeker.setCurrentBuffer(encodedBuffer);
232       encodedSeekers.add(seeker);
233     }
234     // test it!
235     // try a few random seeks
236     for (boolean seekBefore : new boolean[] { false, true }) {
237       for (int i = 0; i < NUM_RANDOM_SEEKS; ++i) {
238         int keyValueId;
239         if (!seekBefore) {
240           keyValueId = randomizer.nextInt(sampleKv.size());
241         } else {
242           keyValueId = randomizer.nextInt(sampleKv.size() - 1) + 1;
243         }
244 
245         KeyValue keyValue = sampleKv.get(keyValueId);
246         checkSeekingConsistency(encodedSeekers, seekBefore, keyValue);
247       }
248     }
249 
250     // check edge cases
251     checkSeekingConsistency(encodedSeekers, false, sampleKv.get(0));
252     for (boolean seekBefore : new boolean[] { false, true }) {
253       checkSeekingConsistency(encodedSeekers, seekBefore, sampleKv.get(sampleKv.size() - 1));
254       KeyValue midKv = sampleKv.get(sampleKv.size() / 2);
255       KeyValue lastMidKv = midKv.createLastOnRowCol();
256       checkSeekingConsistency(encodedSeekers, seekBefore, lastMidKv);
257     }
258   }
259 
260   @Test
261   public void testNextOnSample() {
262     List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
263     ByteBuffer originalBuffer = RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
264         includesMemstoreTS);
265 
266     for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
267       if (encoding.getEncoder() == null) {
268         continue;
269       }
270 
271       DataBlockEncoder encoder = encoding.getEncoder();
272       ByteBuffer encodedBuffer = null;
273       try {
274         encodedBuffer = ByteBuffer.wrap(encodeBytes(encoding, originalBuffer));
275       } catch (IOException e) {
276         throw new RuntimeException(String.format("Bug while encoding using '%s'",
277             encoder.toString()), e);
278       }
279       HFileContext meta = new HFileContextBuilder()
280                           .withHBaseCheckSum(false)
281                           .withIncludesMvcc(includesMemstoreTS)
282                           .withIncludesTags(includesTags)
283                           .withCompression(Compression.Algorithm.NONE)
284                           .build();
285       DataBlockEncoder.EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR,
286           encoder.newDataBlockDecodingContext(meta));
287       seeker.setCurrentBuffer(encodedBuffer);
288       int i = 0;
289       do {
290         KeyValue expectedKeyValue = sampleKv.get(i);
291         ByteBuffer keyValue = seeker.getKeyValueBuffer();
292         if (0 != Bytes.compareTo(keyValue.array(), keyValue.arrayOffset(), keyValue.limit(),
293             expectedKeyValue.getBuffer(), expectedKeyValue.getOffset(),
294             expectedKeyValue.getLength())) {
295 
296           int commonPrefix = 0;
297           byte[] left = keyValue.array();
298           byte[] right = expectedKeyValue.getBuffer();
299           int leftOff = keyValue.arrayOffset();
300           int rightOff = expectedKeyValue.getOffset();
301           int length = Math.min(keyValue.limit(), expectedKeyValue.getLength());
302           while (commonPrefix < length
303               && left[commonPrefix + leftOff] == right[commonPrefix + rightOff]) {
304             commonPrefix++;
305           }
306 
307           fail(String.format("next() produces wrong results "
308               + "encoder: %s i: %d commonPrefix: %d" + "\n expected %s\n actual      %s", encoder
309               .toString(), i, commonPrefix, Bytes.toStringBinary(expectedKeyValue.getBuffer(),
310               expectedKeyValue.getOffset(), expectedKeyValue.getLength()), Bytes
311               .toStringBinary(keyValue)));
312         }
313         i++;
314       } while (seeker.next());
315     }
316   }
317 
318   /**
319    * Test whether the decompression of first key is implemented correctly.
320    */
321   @Test
322   public void testFirstKeyInBlockOnSample() {
323     List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
324     ByteBuffer originalBuffer = RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
325         includesMemstoreTS);
326 
327     for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
328       if (encoding.getEncoder() == null) {
329         continue;
330       }
331       DataBlockEncoder encoder = encoding.getEncoder();
332       ByteBuffer encodedBuffer = null;
333       try {
334         encodedBuffer = ByteBuffer.wrap(encodeBytes(encoding, originalBuffer));
335       } catch (IOException e) {
336         throw new RuntimeException(String.format("Bug while encoding using '%s'",
337             encoder.toString()), e);
338       }
339       ByteBuffer keyBuffer = encoder.getFirstKeyInBlock(encodedBuffer);
340       KeyValue firstKv = sampleKv.get(0);
341       if (0 != Bytes.compareTo(keyBuffer.array(), keyBuffer.arrayOffset(), keyBuffer.limit(),
342           firstKv.getBuffer(), firstKv.getKeyOffset(), firstKv.getKeyLength())) {
343 
344         int commonPrefix = 0;
345         int length = Math.min(keyBuffer.limit(), firstKv.getKeyLength());
346         while (commonPrefix < length
347             && keyBuffer.array()[keyBuffer.arrayOffset() + commonPrefix] == firstKv.getBuffer()[firstKv
348                 .getKeyOffset() + commonPrefix]) {
349           commonPrefix++;
350         }
351         fail(String.format("Bug in '%s' commonPrefix %d", encoder.toString(), commonPrefix));
352       }
353     }
354   }
355   
356   private void checkSeekingConsistency(List<DataBlockEncoder.EncodedSeeker> encodedSeekers,
357       boolean seekBefore, KeyValue keyValue) {
358     ByteBuffer expectedKeyValue = null;
359     ByteBuffer expectedKey = null;
360     ByteBuffer expectedValue = null;
361 
362     for (DataBlockEncoder.EncodedSeeker seeker : encodedSeekers) {
363       seeker.seekToKeyInBlock(keyValue.getBuffer(), keyValue.getKeyOffset(),
364           keyValue.getKeyLength(), seekBefore);
365       seeker.rewind();
366 
367       ByteBuffer actualKeyValue = seeker.getKeyValueBuffer();
368       ByteBuffer actualKey = seeker.getKeyDeepCopy();
369       ByteBuffer actualValue = seeker.getValueShallowCopy();
370 
371       if (expectedKeyValue != null) {
372         assertEquals(expectedKeyValue, actualKeyValue);
373       } else {
374         expectedKeyValue = actualKeyValue;
375       }
376 
377       if (expectedKey != null) {
378         assertEquals(expectedKey, actualKey);
379       } else {
380         expectedKey = actualKey;
381       }
382 
383       if (expectedValue != null) {
384         assertEquals(expectedValue, actualValue);
385       } else {
386         expectedValue = actualValue;
387       }
388     }
389   }
390   
391   private void testEncodersOnDataset(ByteBuffer onDataset, List<KeyValue> kvList) throws IOException {
392     ByteBuffer dataset = ByteBuffer.allocate(onDataset.capacity());
393     onDataset.rewind();
394     dataset.put(onDataset);
395     onDataset.rewind();
396     dataset.flip();
397 
398     for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
399       if (encoding.getEncoder() == null) {
400         continue;
401       }
402 
403       testAlgorithm(dataset, encoding, kvList);
404 
405       // ensure that dataset is unchanged
406       dataset.rewind();
407       assertEquals("Input of two methods is changed", onDataset, dataset);
408     }
409   }
410   
411   @Test
412   public void testZeroByte() throws IOException {
413     List<KeyValue> kvList = new ArrayList<KeyValue>();
414     byte[] row = Bytes.toBytes("abcd");
415     byte[] family = new byte[] { 'f' };
416     byte[] qualifier0 = new byte[] { 'b' };
417     byte[] qualifier1 = new byte[] { 'c' };
418     byte[] value0 = new byte[] { 'd' };
419     byte[] value1 = new byte[] { 0x00 };
420     if (includesTags) {
421       kvList.add(new KeyValue(row, family, qualifier0, 0, value0, new Tag[] { new Tag((byte) 1,
422           "value1") }));
423       kvList.add(new KeyValue(row, family, qualifier1, 0, value1, new Tag[] { new Tag((byte) 1,
424           "value1") }));
425     } else {
426       kvList.add(new KeyValue(row, family, qualifier0, 0, Type.Put, value0));
427       kvList.add(new KeyValue(row, family, qualifier1, 0, Type.Put, value1));
428     }
429     testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList, includesMemstoreTS),
430         kvList);
431   }
432 
433 }