1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import static org.junit.Assert.assertTrue;
22
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.Collection;
27 import java.util.Collections;
28 import java.util.HashMap;
29 import java.util.HashSet;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.Random;
33 import java.util.Set;
34
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.hbase.Cell;
38 import org.apache.hadoop.hbase.CellUtil;
39 import org.apache.hadoop.hbase.HBaseTestingUtility;
40 import org.apache.hadoop.hbase.HColumnDescriptor;
41 import org.apache.hadoop.hbase.HConstants;
42 import org.apache.hadoop.hbase.KeyValue;
43 import org.apache.hadoop.hbase.client.Delete;
44 import org.apache.hadoop.hbase.client.Put;
45 import org.apache.hadoop.hbase.client.Scan;
46 import org.apache.hadoop.hbase.io.compress.Compression;
47 import org.apache.hadoop.hbase.testclassification.MediumTests;
48 import org.apache.hadoop.hbase.util.Bytes;
49 import org.junit.After;
50 import org.junit.Before;
51 import org.junit.Test;
52 import org.junit.experimental.categories.Category;
53 import org.junit.runner.RunWith;
54 import org.junit.runners.Parameterized;
55 import org.junit.runners.Parameterized.Parameters;
56
57
58
59
60
61 @RunWith(Parameterized.class)
62 @Category(MediumTests.class)
63 public class TestSeekOptimizations {
64
65 private static final Log LOG =
66 LogFactory.getLog(TestSeekOptimizations.class);
67
68
69 private static final String FAMILY = "myCF";
70 private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
71
72 private static final int PUTS_PER_ROW_COL = 50;
73 private static final int DELETES_PER_ROW_COL = 10;
74
75 private static final int NUM_ROWS = 3;
76 private static final int NUM_COLS = 3;
77
78 private static final boolean VERBOSE = false;
79
80
81
82
83
84 private static final boolean USE_MANY_STORE_FILES = true;
85
86 private static final int[][] COLUMN_SETS = new int[][] {
87 {},
88 {0},
89 {1},
90 {0, 2},
91 {1, 2},
92 {0, 1, 2},
93 };
94
95
96
97 private static final int[][] ROW_RANGES = new int[][] {
98 {-1, -1},
99 {0, 1},
100 {1, 1},
101 {1, 2},
102 {0, 2}
103 };
104
105 private static final int[] MAX_VERSIONS_VALUES = new int[] { 1, 2 };
106
107
108 private HRegion region;
109 private Put put;
110 private Delete del;
111 private Random rand;
112 private Set<Long> putTimestamps = new HashSet<Long>();
113 private Set<Long> delTimestamps = new HashSet<Long>();
114 private List<Cell> expectedKVs = new ArrayList<Cell>();
115
116 private Compression.Algorithm comprAlgo;
117 private BloomType bloomType;
118
119 private long totalSeekDiligent, totalSeekLazy;
120
121 private final static HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU();
122
123 @Parameters
124 public static final Collection<Object[]> parameters() {
125 return HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS;
126 }
127
128 public TestSeekOptimizations(Compression.Algorithm comprAlgo,
129 BloomType bloomType) {
130 this.comprAlgo = comprAlgo;
131 this.bloomType = bloomType;
132 }
133
134 @Before
135 public void setUp() {
136 rand = new Random(91238123L);
137 expectedKVs.clear();
138 }
139
140 @Test
141 public void testMultipleTimestampRanges() throws IOException {
142
143 StoreFileScanner.instrument();
144
145 region = TEST_UTIL.createTestRegion("testMultipleTimestampRanges",
146 new HColumnDescriptor(FAMILY)
147 .setCompressionType(comprAlgo)
148 .setBloomFilterType(bloomType)
149 .setMaxVersions(3)
150 );
151
152
153 final long latestDelTS = USE_MANY_STORE_FILES ? 1397 : -1;
154
155 createTimestampRange(1, 50, -1);
156 createTimestampRange(51, 100, -1);
157 if (USE_MANY_STORE_FILES) {
158 createTimestampRange(100, 500, 127);
159 createTimestampRange(900, 1300, -1);
160 createTimestampRange(1301, 2500, latestDelTS);
161 createTimestampRange(2502, 2598, -1);
162 createTimestampRange(2599, 2999, -1);
163 }
164
165 prepareExpectedKVs(latestDelTS);
166
167 for (int[] columnArr : COLUMN_SETS) {
168 for (int[] rowRange : ROW_RANGES) {
169 for (int maxVersions : MAX_VERSIONS_VALUES) {
170 for (boolean lazySeekEnabled : new boolean[] { false, true }) {
171 testScan(columnArr, lazySeekEnabled, rowRange[0], rowRange[1],
172 maxVersions);
173 }
174 }
175 }
176 }
177
178 final double seekSavings = 1 - totalSeekLazy * 1.0 / totalSeekDiligent;
179 System.err.println("For bloom=" + bloomType + ", compr=" + comprAlgo +
180 " total seeks without optimization: " + totalSeekDiligent
181 + ", with optimization: " + totalSeekLazy + " (" +
182 String.format("%.2f%%", totalSeekLazy * 100.0 / totalSeekDiligent) +
183 "), savings: " + String.format("%.2f%%",
184 100.0 * seekSavings) + "\n");
185
186
187
188 final double expectedSeekSavings = 0.0;
189 assertTrue("Lazy seek is only saving " +
190 String.format("%.2f%%", seekSavings * 100) + " seeks but should " +
191 "save at least " + String.format("%.2f%%", expectedSeekSavings * 100),
192 seekSavings >= expectedSeekSavings);
193 }
194
195 private void testScan(final int[] columnArr, final boolean lazySeekEnabled,
196 final int startRow, final int endRow, int maxVersions)
197 throws IOException {
198 StoreScanner.enableLazySeekGlobally(lazySeekEnabled);
199 final Scan scan = new Scan();
200 final Set<String> qualSet = new HashSet<String>();
201 for (int iColumn : columnArr) {
202 String qualStr = getQualStr(iColumn);
203 scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qualStr));
204 qualSet.add(qualStr);
205 }
206 scan.setMaxVersions(maxVersions);
207 scan.setStartRow(rowBytes(startRow));
208
209
210 {
211 final byte[] scannerStopRow =
212 rowBytes(endRow + (startRow != endRow ? 1 : 0));
213 scan.setStopRow(scannerStopRow);
214 }
215
216 final long initialSeekCount = StoreFileScanner.getSeekCount();
217 final InternalScanner scanner = region.getScanner(scan);
218 final List<Cell> results = new ArrayList<Cell>();
219 final List<Cell> actualKVs = new ArrayList<Cell>();
220
221
222
223
224 boolean hasNext;
225 do {
226 hasNext = scanner.next(results);
227 actualKVs.addAll(results);
228 results.clear();
229 } while (hasNext);
230
231 List<Cell> filteredKVs = filterExpectedResults(qualSet,
232 rowBytes(startRow), rowBytes(endRow), maxVersions);
233 final String rowRestrictionStr =
234 (startRow == -1 && endRow == -1) ? "all rows" : (
235 startRow == endRow ? ("row=" + startRow) : ("startRow="
236 + startRow + ", " + "endRow=" + endRow));
237 final String columnRestrictionStr =
238 columnArr.length == 0 ? "all columns"
239 : ("columns=" + Arrays.toString(columnArr));
240 final String testDesc =
241 "Bloom=" + bloomType + ", compr=" + comprAlgo + ", "
242 + (scan.isGetScan() ? "Get" : "Scan") + ": "
243 + columnRestrictionStr + ", " + rowRestrictionStr
244 + ", maxVersions=" + maxVersions + ", lazySeek=" + lazySeekEnabled;
245 long seekCount = StoreFileScanner.getSeekCount() - initialSeekCount;
246 if (VERBOSE) {
247 System.err.println("Seek count: " + seekCount + ", KVs returned: "
248 + actualKVs.size() + ". " + testDesc +
249 (lazySeekEnabled ? "\n" : ""));
250 }
251 if (lazySeekEnabled) {
252 totalSeekLazy += seekCount;
253 } else {
254 totalSeekDiligent += seekCount;
255 }
256 assertKVListsEqual(testDesc, filteredKVs, actualKVs);
257 }
258
259 private List<Cell> filterExpectedResults(Set<String> qualSet,
260 byte[] startRow, byte[] endRow, int maxVersions) {
261 final List<Cell> filteredKVs = new ArrayList<Cell>();
262 final Map<String, Integer> verCount = new HashMap<String, Integer>();
263 for (Cell kv : expectedKVs) {
264 if (startRow.length > 0 &&
265 Bytes.compareTo(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
266 startRow, 0, startRow.length) < 0) {
267 continue;
268 }
269
270
271 if (endRow.length > 0 &&
272 Bytes.compareTo(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
273 endRow, 0, endRow.length) > 0) {
274 continue;
275 }
276
277 if (!qualSet.isEmpty() && (!CellUtil.matchingFamily(kv, FAMILY_BYTES)
278 || !qualSet.contains(Bytes.toString(CellUtil.cloneQualifier(kv))))) {
279 continue;
280 }
281
282 final String rowColStr =
283 Bytes.toStringBinary(CellUtil.cloneRow(kv)) + "/"
284 + Bytes.toStringBinary(CellUtil.cloneFamily(kv)) + ":"
285 + Bytes.toStringBinary(CellUtil.cloneQualifier(kv));
286 final Integer curNumVer = verCount.get(rowColStr);
287 final int newNumVer = curNumVer != null ? (curNumVer + 1) : 1;
288 if (newNumVer <= maxVersions) {
289 filteredKVs.add(kv);
290 verCount.put(rowColStr, newNumVer);
291 }
292 }
293
294 return filteredKVs;
295 }
296
297 private void prepareExpectedKVs(long latestDelTS) {
298 final List<Cell> filteredKVs = new ArrayList<Cell>();
299 for (Cell kv : expectedKVs) {
300 if (kv.getTimestamp() > latestDelTS || latestDelTS == -1) {
301 filteredKVs.add(kv);
302 }
303 }
304 expectedKVs = filteredKVs;
305 Collections.sort(expectedKVs, KeyValue.COMPARATOR);
306 }
307
308 public void put(String qual, long ts) {
309 if (!putTimestamps.contains(ts)) {
310 put.add(FAMILY_BYTES, Bytes.toBytes(qual), ts, createValue(ts));
311 putTimestamps.add(ts);
312 }
313 if (VERBOSE) {
314 LOG.info("put: row " + Bytes.toStringBinary(put.getRow())
315 + ", cf " + FAMILY + ", qualifier " + qual + ", ts " + ts);
316 }
317 }
318
319 private byte[] createValue(long ts) {
320 return Bytes.toBytes("value" + ts);
321 }
322
323 public void delAtTimestamp(String qual, long ts) {
324 del.deleteColumn(FAMILY_BYTES, Bytes.toBytes(qual), ts);
325 logDelete(qual, ts, "at");
326 }
327
328 private void logDelete(String qual, long ts, String delType) {
329 if (VERBOSE) {
330 LOG.info("del " + delType + ": row "
331 + Bytes.toStringBinary(put.getRow()) + ", cf " + FAMILY
332 + ", qualifier " + qual + ", ts " + ts);
333 }
334 }
335
336 private void delUpToTimestamp(String qual, long upToTS) {
337 del.deleteColumns(FAMILY_BYTES, Bytes.toBytes(qual), upToTS);
338 logDelete(qual, upToTS, "up to and including");
339 }
340
341 private long randLong(long n) {
342 long l = rand.nextLong();
343 if (l == Long.MIN_VALUE)
344 l = Long.MAX_VALUE;
345 return Math.abs(l) % n;
346 }
347
348 private long randBetween(long a, long b) {
349 long x = a + randLong(b - a + 1);
350 assertTrue(a <= x && x <= b);
351 return x;
352 }
353
354 private final String rowStr(int i) {
355 return ("row" + i).intern();
356 }
357
358 private final byte[] rowBytes(int i) {
359 if (i == -1) {
360 return HConstants.EMPTY_BYTE_ARRAY;
361 }
362 return Bytes.toBytes(rowStr(i));
363 }
364
365 private final String getQualStr(int i) {
366 return ("qual" + i).intern();
367 }
368
369 public void createTimestampRange(long minTS, long maxTS,
370 long deleteUpToTS) throws IOException {
371 assertTrue(minTS < maxTS);
372 assertTrue(deleteUpToTS == -1
373 || (minTS <= deleteUpToTS && deleteUpToTS <= maxTS));
374
375 for (int iRow = 0; iRow < NUM_ROWS; ++iRow) {
376 final String row = rowStr(iRow);
377 final byte[] rowBytes = Bytes.toBytes(row);
378 for (int iCol = 0; iCol < NUM_COLS; ++iCol) {
379 final String qual = getQualStr(iCol);
380 final byte[] qualBytes = Bytes.toBytes(qual);
381 put = new Put(rowBytes);
382
383 putTimestamps.clear();
384 put(qual, minTS);
385 put(qual, maxTS);
386 for (int i = 0; i < PUTS_PER_ROW_COL; ++i) {
387 put(qual, randBetween(minTS, maxTS));
388 }
389
390 long[] putTimestampList = new long[putTimestamps.size()];
391 {
392 int i = 0;
393 for (long ts : putTimestamps) {
394 putTimestampList[i++] = ts;
395 }
396 }
397
398
399 delTimestamps.clear();
400 assertTrue(putTimestampList.length >= DELETES_PER_ROW_COL);
401 int numToDel = DELETES_PER_ROW_COL;
402 int tsRemaining = putTimestampList.length;
403 del = new Delete(rowBytes);
404 for (long ts : putTimestampList) {
405 if (rand.nextInt(tsRemaining) < numToDel) {
406 delAtTimestamp(qual, ts);
407 putTimestamps.remove(ts);
408 --numToDel;
409 }
410
411 if (--tsRemaining == 0) {
412 break;
413 }
414 }
415
416
417 if (deleteUpToTS != -1) {
418 delUpToTimestamp(qual, deleteUpToTS);
419 }
420
421 region.put(put);
422 if (!del.isEmpty()) {
423 region.delete(del);
424 }
425
426
427
428 for (long ts : putTimestamps) {
429 expectedKVs.add(new KeyValue(rowBytes, FAMILY_BYTES, qualBytes, ts,
430 KeyValue.Type.Put));
431 }
432 }
433 }
434
435 region.flush(true);
436 }
437
438 @After
439 public void tearDown() throws IOException {
440 if (region != null) {
441 HRegion.closeHRegion(region);
442 }
443
444
445
446 StoreScanner.enableLazySeekGlobally(
447 StoreScanner.LAZY_SEEK_ENABLED_BY_DEFAULT);
448 }
449
450
451 public void assertKVListsEqual(String additionalMsg,
452 final List<? extends Cell> expected,
453 final List<? extends Cell> actual) {
454 final int eLen = expected.size();
455 final int aLen = actual.size();
456 final int minLen = Math.min(eLen, aLen);
457
458 int i;
459 for (i = 0; i < minLen
460 && KeyValue.COMPARATOR.compareOnlyKeyPortion(expected.get(i), actual.get(i)) == 0;
461 ++i) {}
462
463 if (additionalMsg == null) {
464 additionalMsg = "";
465 }
466 if (!additionalMsg.isEmpty()) {
467 additionalMsg = ". " + additionalMsg;
468 }
469
470 if (eLen != aLen || i != minLen) {
471 throw new AssertionError(
472 "Expected and actual KV arrays differ at position " + i + ": " +
473 HBaseTestingUtility.safeGetAsStr(expected, i) + " (length " + eLen +") vs. " +
474 HBaseTestingUtility.safeGetAsStr(actual, i) + " (length " + aLen + ")" + additionalMsg);
475 }
476 }
477
478 }
479