1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.hadoop.hbase.io.encoding;
18
19 import static org.junit.Assert.assertEquals;
20 import static org.junit.Assert.fail;
21
22 import java.io.ByteArrayInputStream;
23 import java.io.DataInputStream;
24 import java.io.IOException;
25 import java.nio.ByteBuffer;
26 import java.util.ArrayList;
27 import java.util.Collection;
28 import java.util.List;
29 import java.util.Random;
30
31 import org.apache.hadoop.hbase.HBaseTestingUtility;
32 import org.apache.hadoop.hbase.HConstants;
33 import org.apache.hadoop.hbase.KeyValue;
34 import org.apache.hadoop.hbase.KeyValue.Type;
35 import org.apache.hadoop.hbase.LargeTests;
36 import org.apache.hadoop.hbase.Tag;
37 import org.apache.hadoop.hbase.io.compress.Compression;
38 import org.apache.hadoop.hbase.io.hfile.HFileContext;
39 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
40 import org.apache.hadoop.hbase.util.Bytes;
41 import org.apache.hadoop.hbase.util.test.RedundantKVGenerator;
42 import org.junit.Test;
43 import org.junit.experimental.categories.Category;
44 import org.junit.runner.RunWith;
45 import org.junit.runners.Parameterized;
46 import org.junit.runners.Parameterized.Parameters;
47
48
49
50
51
52 @Category(LargeTests.class)
53 @RunWith(Parameterized.class)
54 public class TestDataBlockEncoders {
55
56 private static int NUMBER_OF_KV = 10000;
57 private static int NUM_RANDOM_SEEKS = 10000;
58
59 private static int ENCODED_DATA_OFFSET = HConstants.HFILEBLOCK_HEADER_SIZE
60 + DataBlockEncoding.ID_SIZE;
61
62 private RedundantKVGenerator generator = new RedundantKVGenerator();
63 private Random randomizer = new Random(42l);
64
65 private final boolean includesMemstoreTS;
66 private final boolean includesTags;
67
68 @Parameters
69 public static Collection<Object[]> parameters() {
70 return HBaseTestingUtility.MEMSTORETS_TAGS_PARAMETRIZED;
71 }
72 public TestDataBlockEncoders(boolean includesMemstoreTS, boolean includesTag) {
73 this.includesMemstoreTS = includesMemstoreTS;
74 this.includesTags = includesTag;
75 }
76
77 private HFileBlockEncodingContext getEncodingContext(Compression.Algorithm algo,
78 DataBlockEncoding encoding) {
79 DataBlockEncoder encoder = encoding.getEncoder();
80 HFileContext meta = new HFileContextBuilder()
81 .withHBaseCheckSum(false)
82 .withIncludesMvcc(includesMemstoreTS)
83 .withIncludesTags(includesTags)
84 .withCompression(algo).build();
85 if (encoder != null) {
86 return encoder.newDataBlockEncodingContext(encoding,
87 HConstants.HFILEBLOCK_DUMMY_HEADER, meta);
88 } else {
89 return new HFileBlockDefaultEncodingContext(encoding,
90 HConstants.HFILEBLOCK_DUMMY_HEADER, meta);
91 }
92 }
93
94 private byte[] encodeBytes(DataBlockEncoding encoding, ByteBuffer dataset)
95 throws IOException {
96 DataBlockEncoder encoder = encoding.getEncoder();
97 HFileBlockEncodingContext encodingCtx = getEncodingContext(Compression.Algorithm.NONE,
98 encoding);
99
100 encoder.encodeKeyValues(dataset, encodingCtx);
101
102 byte[] encodedBytesWithHeader = encodingCtx.getUncompressedBytesWithHeader();
103 byte[] encodedData = new byte[encodedBytesWithHeader.length - ENCODED_DATA_OFFSET];
104 System.arraycopy(encodedBytesWithHeader, ENCODED_DATA_OFFSET, encodedData, 0,
105 encodedData.length);
106 return encodedData;
107 }
108
109 private void testAlgorithm(ByteBuffer dataset, DataBlockEncoding encoding,
110 List<KeyValue> kvList) throws IOException {
111
112 byte[] encodedBytes = encodeBytes(encoding, dataset);
113
114 ByteArrayInputStream bais = new ByteArrayInputStream(encodedBytes);
115 DataInputStream dis = new DataInputStream(bais);
116 ByteBuffer actualDataset;
117 DataBlockEncoder encoder = encoding.getEncoder();
118 HFileContext meta = new HFileContextBuilder()
119 .withHBaseCheckSum(false)
120 .withIncludesMvcc(includesMemstoreTS)
121 .withIncludesTags(includesTags)
122 .withCompression(Compression.Algorithm.NONE).build();
123 actualDataset = encoder.decodeKeyValues(dis, encoder.newDataBlockDecodingContext(meta));
124 dataset.rewind();
125 actualDataset.rewind();
126
127
128
129
130
131 assertEquals("Encoding -> decoding gives different results for " + encoder,
132 Bytes.toStringBinary(dataset), Bytes.toStringBinary(actualDataset));
133
134 }
135
136
137
138
139
140
141
142 @Test
143 public void testEmptyKeyValues() throws IOException {
144 List<KeyValue> kvList = new ArrayList<KeyValue>();
145 byte[] row = new byte[0];
146 byte[] family = new byte[0];
147 byte[] qualifier = new byte[0];
148 byte[] value = new byte[0];
149 if (!includesTags) {
150 kvList.add(new KeyValue(row, family, qualifier, 0l, value));
151 kvList.add(new KeyValue(row, family, qualifier, 0l, value));
152 } else {
153 byte[] metaValue1 = Bytes.toBytes("metaValue1");
154 byte[] metaValue2 = Bytes.toBytes("metaValue2");
155 kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1,
156 metaValue1) }));
157 kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1,
158 metaValue2) }));
159 }
160 testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList, includesMemstoreTS),
161 kvList);
162 }
163
164
165
166
167
168
169
170 @Test
171 public void testNegativeTimestamps() throws IOException {
172 List<KeyValue> kvList = new ArrayList<KeyValue>();
173 byte[] row = new byte[0];
174 byte[] family = new byte[0];
175 byte[] qualifier = new byte[0];
176 byte[] value = new byte[0];
177 if (includesTags) {
178 byte[] metaValue1 = Bytes.toBytes("metaValue1");
179 byte[] metaValue2 = Bytes.toBytes("metaValue2");
180 kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1,
181 metaValue1) }));
182 kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1,
183 metaValue2) }));
184 } else {
185 kvList.add(new KeyValue(row, family, qualifier, -1l, Type.Put, value));
186 kvList.add(new KeyValue(row, family, qualifier, -2l, Type.Put, value));
187 }
188 testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList, includesMemstoreTS),
189 kvList);
190 }
191
192
193
194
195
196
197
198 @Test
199 public void testExecutionOnSample() throws IOException {
200 List<KeyValue> kvList = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
201 testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList, includesMemstoreTS),
202 kvList);
203 }
204
205
206
207
208 @Test
209 public void testSeekingOnSample() throws IOException {
210 List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
211 ByteBuffer originalBuffer = RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
212 includesMemstoreTS);
213
214
215 List<DataBlockEncoder.EncodedSeeker> encodedSeekers = new ArrayList<DataBlockEncoder.EncodedSeeker>();
216 for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
217 if (encoding.getEncoder() == null) {
218 continue;
219 }
220
221 ByteBuffer encodedBuffer = ByteBuffer.wrap(encodeBytes(encoding, originalBuffer));
222 DataBlockEncoder encoder = encoding.getEncoder();
223 HFileContext meta = new HFileContextBuilder()
224 .withHBaseCheckSum(false)
225 .withIncludesMvcc(includesMemstoreTS)
226 .withIncludesTags(includesTags)
227 .withCompression(Compression.Algorithm.NONE)
228 .build();
229 DataBlockEncoder.EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR,
230 encoder.newDataBlockDecodingContext(meta));
231 seeker.setCurrentBuffer(encodedBuffer);
232 encodedSeekers.add(seeker);
233 }
234
235
236 for (boolean seekBefore : new boolean[] { false, true }) {
237 for (int i = 0; i < NUM_RANDOM_SEEKS; ++i) {
238 int keyValueId;
239 if (!seekBefore) {
240 keyValueId = randomizer.nextInt(sampleKv.size());
241 } else {
242 keyValueId = randomizer.nextInt(sampleKv.size() - 1) + 1;
243 }
244
245 KeyValue keyValue = sampleKv.get(keyValueId);
246 checkSeekingConsistency(encodedSeekers, seekBefore, keyValue);
247 }
248 }
249
250
251 checkSeekingConsistency(encodedSeekers, false, sampleKv.get(0));
252 for (boolean seekBefore : new boolean[] { false, true }) {
253 checkSeekingConsistency(encodedSeekers, seekBefore, sampleKv.get(sampleKv.size() - 1));
254 KeyValue midKv = sampleKv.get(sampleKv.size() / 2);
255 KeyValue lastMidKv = midKv.createLastOnRowCol();
256 checkSeekingConsistency(encodedSeekers, seekBefore, lastMidKv);
257 }
258 }
259
260 @Test
261 public void testNextOnSample() {
262 List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
263 ByteBuffer originalBuffer = RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
264 includesMemstoreTS);
265
266 for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
267 if (encoding.getEncoder() == null) {
268 continue;
269 }
270
271 DataBlockEncoder encoder = encoding.getEncoder();
272 ByteBuffer encodedBuffer = null;
273 try {
274 encodedBuffer = ByteBuffer.wrap(encodeBytes(encoding, originalBuffer));
275 } catch (IOException e) {
276 throw new RuntimeException(String.format("Bug while encoding using '%s'",
277 encoder.toString()), e);
278 }
279 HFileContext meta = new HFileContextBuilder()
280 .withHBaseCheckSum(false)
281 .withIncludesMvcc(includesMemstoreTS)
282 .withIncludesTags(includesTags)
283 .withCompression(Compression.Algorithm.NONE)
284 .build();
285 DataBlockEncoder.EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR,
286 encoder.newDataBlockDecodingContext(meta));
287 seeker.setCurrentBuffer(encodedBuffer);
288 int i = 0;
289 do {
290 KeyValue expectedKeyValue = sampleKv.get(i);
291 ByteBuffer keyValue = seeker.getKeyValueBuffer();
292 if (0 != Bytes.compareTo(keyValue.array(), keyValue.arrayOffset(), keyValue.limit(),
293 expectedKeyValue.getBuffer(), expectedKeyValue.getOffset(),
294 expectedKeyValue.getLength())) {
295
296 int commonPrefix = 0;
297 byte[] left = keyValue.array();
298 byte[] right = expectedKeyValue.getBuffer();
299 int leftOff = keyValue.arrayOffset();
300 int rightOff = expectedKeyValue.getOffset();
301 int length = Math.min(keyValue.limit(), expectedKeyValue.getLength());
302 while (commonPrefix < length
303 && left[commonPrefix + leftOff] == right[commonPrefix + rightOff]) {
304 commonPrefix++;
305 }
306
307 fail(String.format("next() produces wrong results "
308 + "encoder: %s i: %d commonPrefix: %d" + "\n expected %s\n actual %s", encoder
309 .toString(), i, commonPrefix, Bytes.toStringBinary(expectedKeyValue.getBuffer(),
310 expectedKeyValue.getOffset(), expectedKeyValue.getLength()), Bytes
311 .toStringBinary(keyValue)));
312 }
313 i++;
314 } while (seeker.next());
315 }
316 }
317
318
319
320
321 @Test
322 public void testFirstKeyInBlockOnSample() {
323 List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
324 ByteBuffer originalBuffer = RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
325 includesMemstoreTS);
326
327 for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
328 if (encoding.getEncoder() == null) {
329 continue;
330 }
331 DataBlockEncoder encoder = encoding.getEncoder();
332 ByteBuffer encodedBuffer = null;
333 try {
334 encodedBuffer = ByteBuffer.wrap(encodeBytes(encoding, originalBuffer));
335 } catch (IOException e) {
336 throw new RuntimeException(String.format("Bug while encoding using '%s'",
337 encoder.toString()), e);
338 }
339 ByteBuffer keyBuffer = encoder.getFirstKeyInBlock(encodedBuffer);
340 KeyValue firstKv = sampleKv.get(0);
341 if (0 != Bytes.compareTo(keyBuffer.array(), keyBuffer.arrayOffset(), keyBuffer.limit(),
342 firstKv.getBuffer(), firstKv.getKeyOffset(), firstKv.getKeyLength())) {
343
344 int commonPrefix = 0;
345 int length = Math.min(keyBuffer.limit(), firstKv.getKeyLength());
346 while (commonPrefix < length
347 && keyBuffer.array()[keyBuffer.arrayOffset() + commonPrefix] == firstKv.getBuffer()[firstKv
348 .getKeyOffset() + commonPrefix]) {
349 commonPrefix++;
350 }
351 fail(String.format("Bug in '%s' commonPrefix %d", encoder.toString(), commonPrefix));
352 }
353 }
354 }
355
356 private void checkSeekingConsistency(List<DataBlockEncoder.EncodedSeeker> encodedSeekers,
357 boolean seekBefore, KeyValue keyValue) {
358 ByteBuffer expectedKeyValue = null;
359 ByteBuffer expectedKey = null;
360 ByteBuffer expectedValue = null;
361
362 for (DataBlockEncoder.EncodedSeeker seeker : encodedSeekers) {
363 seeker.seekToKeyInBlock(keyValue.getBuffer(), keyValue.getKeyOffset(),
364 keyValue.getKeyLength(), seekBefore);
365 seeker.rewind();
366
367 ByteBuffer actualKeyValue = seeker.getKeyValueBuffer();
368 ByteBuffer actualKey = seeker.getKeyDeepCopy();
369 ByteBuffer actualValue = seeker.getValueShallowCopy();
370
371 if (expectedKeyValue != null) {
372 assertEquals(expectedKeyValue, actualKeyValue);
373 } else {
374 expectedKeyValue = actualKeyValue;
375 }
376
377 if (expectedKey != null) {
378 assertEquals(expectedKey, actualKey);
379 } else {
380 expectedKey = actualKey;
381 }
382
383 if (expectedValue != null) {
384 assertEquals(expectedValue, actualValue);
385 } else {
386 expectedValue = actualValue;
387 }
388 }
389 }
390
391 private void testEncodersOnDataset(ByteBuffer onDataset, List<KeyValue> kvList) throws IOException {
392 ByteBuffer dataset = ByteBuffer.allocate(onDataset.capacity());
393 onDataset.rewind();
394 dataset.put(onDataset);
395 onDataset.rewind();
396 dataset.flip();
397
398 for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
399 if (encoding.getEncoder() == null) {
400 continue;
401 }
402
403 testAlgorithm(dataset, encoding, kvList);
404
405
406 dataset.rewind();
407 assertEquals("Input of two methods is changed", onDataset, dataset);
408 }
409 }
410
411 @Test
412 public void testZeroByte() throws IOException {
413 List<KeyValue> kvList = new ArrayList<KeyValue>();
414 byte[] row = Bytes.toBytes("abcd");
415 byte[] family = new byte[] { 'f' };
416 byte[] qualifier0 = new byte[] { 'b' };
417 byte[] qualifier1 = new byte[] { 'c' };
418 byte[] value0 = new byte[] { 'd' };
419 byte[] value1 = new byte[] { 0x00 };
420 if (includesTags) {
421 kvList.add(new KeyValue(row, family, qualifier0, 0, value0, new Tag[] { new Tag((byte) 1,
422 "value1") }));
423 kvList.add(new KeyValue(row, family, qualifier1, 0, value1, new Tag[] { new Tag((byte) 1,
424 "value1") }));
425 } else {
426 kvList.add(new KeyValue(row, family, qualifier0, 0, Type.Put, value0));
427 kvList.add(new KeyValue(row, family, qualifier1, 0, Type.Put, value1));
428 }
429 testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList, includesMemstoreTS),
430 kvList);
431 }
432
433 }