View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import static org.junit.Assert.assertTrue;
22  
23  import java.io.IOException;
24  import java.util.ArrayList;
25  import java.util.Arrays;
26  import java.util.Collection;
27  import java.util.Collections;
28  import java.util.HashMap;
29  import java.util.HashSet;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.Random;
33  import java.util.Set;
34  
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.hbase.Cell;
38  import org.apache.hadoop.hbase.CellUtil;
39  import org.apache.hadoop.hbase.HBaseTestingUtility;
40  import org.apache.hadoop.hbase.HColumnDescriptor;
41  import org.apache.hadoop.hbase.HConstants;
42  import org.apache.hadoop.hbase.KeyValue;
43  import org.apache.hadoop.hbase.client.Delete;
44  import org.apache.hadoop.hbase.client.Put;
45  import org.apache.hadoop.hbase.client.Scan;
46  import org.apache.hadoop.hbase.io.compress.Compression;
47  import org.apache.hadoop.hbase.testclassification.MediumTests;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.junit.After;
50  import org.junit.Before;
51  import org.junit.Test;
52  import org.junit.experimental.categories.Category;
53  import org.junit.runner.RunWith;
54  import org.junit.runners.Parameterized;
55  import org.junit.runners.Parameterized.Parameters;
56  
57  /**
58   * Test various seek optimizations for correctness and check if they are
59   * actually saving I/O operations.
60   */
61  @RunWith(Parameterized.class)
62  @Category(MediumTests.class)
63  public class TestSeekOptimizations {
64  
65    private static final Log LOG =
66        LogFactory.getLog(TestSeekOptimizations.class);
67  
68    // Constants
69    private static final String FAMILY = "myCF";
70    private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
71  
72    private static final int PUTS_PER_ROW_COL = 50;
73    private static final int DELETES_PER_ROW_COL = 10;
74  
75    private static final int NUM_ROWS = 3;
76    private static final int NUM_COLS = 3;
77  
78    private static final boolean VERBOSE = false;
79  
80    /**
81     * Disable this when this test fails hopelessly and you need to debug a
82     * simpler case.
83     */
84    private static final boolean USE_MANY_STORE_FILES = true;
85  
86    private static final int[][] COLUMN_SETS = new int[][] {
87      {},  // All columns
88      {0},
89      {1},
90      {0, 2},
91      {1, 2},
92      {0, 1, 2},
93    };
94  
95    // Both start row and end row are inclusive here for the purposes of this
96    // test.
97    private static final int[][] ROW_RANGES = new int[][] {
98      {-1, -1},
99      {0, 1},
100     {1, 1},
101     {1, 2},
102     {0, 2}
103   };
104 
105   private static final int[] MAX_VERSIONS_VALUES = new int[] { 1, 2 };
106 
107   // Instance variables
108   private HRegion region;
109   private Put put;
110   private Delete del;
111   private Random rand;
112   private Set<Long> putTimestamps = new HashSet<Long>();
113   private Set<Long> delTimestamps = new HashSet<Long>();
114   private List<Cell> expectedKVs = new ArrayList<Cell>();
115 
116   private Compression.Algorithm comprAlgo;
117   private BloomType bloomType;
118 
119   private long totalSeekDiligent, totalSeekLazy;
120   
121   private final static HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU();
122 
123   @Parameters
124   public static final Collection<Object[]> parameters() {
125     return HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS;
126   }
127 
128   public TestSeekOptimizations(Compression.Algorithm comprAlgo,
129       BloomType bloomType) {
130     this.comprAlgo = comprAlgo;
131     this.bloomType = bloomType;
132   }
133 
134   @Before
135   public void setUp() {
136     rand = new Random(91238123L);
137     expectedKVs.clear();
138   }
139 
140   @Test
141   public void testMultipleTimestampRanges() throws IOException {
142     // enable seek counting
143     StoreFileScanner.instrument();
144 
145     region = TEST_UTIL.createTestRegion("testMultipleTimestampRanges",
146         new HColumnDescriptor(FAMILY)
147             .setCompressionType(comprAlgo)
148             .setBloomFilterType(bloomType)
149             .setMaxVersions(3)
150     );
151 
152     // Delete the given timestamp and everything before.
153     final long latestDelTS = USE_MANY_STORE_FILES ? 1397 : -1;
154 
155     createTimestampRange(1, 50, -1);
156     createTimestampRange(51, 100, -1);
157     if (USE_MANY_STORE_FILES) {
158       createTimestampRange(100, 500, 127);
159       createTimestampRange(900, 1300, -1);
160       createTimestampRange(1301, 2500, latestDelTS);
161       createTimestampRange(2502, 2598, -1);
162       createTimestampRange(2599, 2999, -1);
163     }
164 
165     prepareExpectedKVs(latestDelTS);
166 
167     for (int[] columnArr : COLUMN_SETS) {
168       for (int[] rowRange : ROW_RANGES) {
169         for (int maxVersions : MAX_VERSIONS_VALUES) {
170           for (boolean lazySeekEnabled : new boolean[] { false, true }) {
171             testScan(columnArr, lazySeekEnabled, rowRange[0], rowRange[1],
172                 maxVersions);
173           }
174         }
175       }
176     }
177 
178     final double seekSavings = 1 - totalSeekLazy * 1.0 / totalSeekDiligent;
179     System.err.println("For bloom=" + bloomType + ", compr=" + comprAlgo +
180         " total seeks without optimization: " + totalSeekDiligent
181         + ", with optimization: " + totalSeekLazy + " (" +
182         String.format("%.2f%%", totalSeekLazy * 100.0 / totalSeekDiligent) +
183         "), savings: " + String.format("%.2f%%",
184             100.0 * seekSavings) + "\n");
185 
186     // Test that lazy seeks are buying us something. Without the actual
187     // implementation of the lazy seek optimization this will be 0.
188     final double expectedSeekSavings = 0.0;
189     assertTrue("Lazy seek is only saving " +
190         String.format("%.2f%%", seekSavings * 100) + " seeks but should " +
191         "save at least " + String.format("%.2f%%", expectedSeekSavings * 100),
192         seekSavings >= expectedSeekSavings);
193   }
194 
195   private void testScan(final int[] columnArr, final boolean lazySeekEnabled,
196       final int startRow, final int endRow, int maxVersions)
197       throws IOException {
198     StoreScanner.enableLazySeekGlobally(lazySeekEnabled);
199     final Scan scan = new Scan();
200     final Set<String> qualSet = new HashSet<String>();
201     for (int iColumn : columnArr) {
202       String qualStr = getQualStr(iColumn);
203       scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qualStr));
204       qualSet.add(qualStr);
205     }
206     scan.setMaxVersions(maxVersions);
207     scan.setStartRow(rowBytes(startRow));
208 
209     // Adjust for the fact that for multi-row queries the end row is exclusive.
210     {
211       final byte[] scannerStopRow =
212           rowBytes(endRow + (startRow != endRow ? 1 : 0));
213       scan.setStopRow(scannerStopRow);
214     }
215 
216     final long initialSeekCount = StoreFileScanner.getSeekCount();
217     final InternalScanner scanner = region.getScanner(scan);
218     final List<Cell> results = new ArrayList<Cell>();
219     final List<Cell> actualKVs = new ArrayList<Cell>();
220 
221     // Such a clumsy do-while loop appears to be the official way to use an
222     // internalScanner. scanner.next() return value refers to the _next_
223     // result, not to the one already returned in results.
224     boolean hasNext;
225     do {
226       hasNext = scanner.next(results);
227       actualKVs.addAll(results);
228       results.clear();
229     } while (hasNext);
230 
231     List<Cell> filteredKVs = filterExpectedResults(qualSet,
232         rowBytes(startRow), rowBytes(endRow), maxVersions);
233     final String rowRestrictionStr =
234         (startRow == -1 && endRow == -1) ? "all rows" : (
235             startRow == endRow ? ("row=" + startRow) : ("startRow="
236             + startRow + ", " + "endRow=" + endRow));
237     final String columnRestrictionStr =
238         columnArr.length == 0 ? "all columns"
239             : ("columns=" + Arrays.toString(columnArr));
240     final String testDesc =
241         "Bloom=" + bloomType + ", compr=" + comprAlgo + ", "
242             + (scan.isGetScan() ? "Get" : "Scan") + ": "
243             + columnRestrictionStr + ", " + rowRestrictionStr
244             + ", maxVersions=" + maxVersions + ", lazySeek=" + lazySeekEnabled;
245     long seekCount = StoreFileScanner.getSeekCount() - initialSeekCount;
246     if (VERBOSE) {
247       System.err.println("Seek count: " + seekCount + ", KVs returned: "
248         + actualKVs.size() + ". " + testDesc +
249         (lazySeekEnabled ? "\n" : ""));
250     }
251     if (lazySeekEnabled) {
252       totalSeekLazy += seekCount;
253     } else {
254       totalSeekDiligent += seekCount;
255     }
256     assertKVListsEqual(testDesc, filteredKVs, actualKVs);
257   }
258 
259   private List<Cell> filterExpectedResults(Set<String> qualSet,
260       byte[] startRow, byte[] endRow, int maxVersions) {
261     final List<Cell> filteredKVs = new ArrayList<Cell>();
262     final Map<String, Integer> verCount = new HashMap<String, Integer>();
263     for (Cell kv : expectedKVs) {
264       if (startRow.length > 0 &&
265           Bytes.compareTo(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
266               startRow, 0, startRow.length) < 0) {
267         continue;
268       }
269 
270       // In this unit test the end row is always inclusive.
271       if (endRow.length > 0 &&
272           Bytes.compareTo(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
273               endRow, 0, endRow.length) > 0) {
274         continue;
275       }
276 
277       if (!qualSet.isEmpty() && (!CellUtil.matchingFamily(kv, FAMILY_BYTES)
278           || !qualSet.contains(Bytes.toString(CellUtil.cloneQualifier(kv))))) {
279         continue;
280       }
281 
282       final String rowColStr =
283         Bytes.toStringBinary(CellUtil.cloneRow(kv)) + "/"
284             + Bytes.toStringBinary(CellUtil.cloneFamily(kv)) + ":"
285             + Bytes.toStringBinary(CellUtil.cloneQualifier(kv));
286       final Integer curNumVer = verCount.get(rowColStr);
287       final int newNumVer = curNumVer != null ? (curNumVer + 1) : 1;
288       if (newNumVer <= maxVersions) {
289         filteredKVs.add(kv);
290         verCount.put(rowColStr, newNumVer);
291       }
292     }
293 
294     return filteredKVs;
295   }
296 
297   private void prepareExpectedKVs(long latestDelTS) {
298     final List<Cell> filteredKVs = new ArrayList<Cell>();
299     for (Cell kv : expectedKVs) {
300       if (kv.getTimestamp() > latestDelTS || latestDelTS == -1) {
301         filteredKVs.add(kv);
302       }
303     }
304     expectedKVs = filteredKVs;
305     Collections.sort(expectedKVs, KeyValue.COMPARATOR);
306   }
307 
308   public void put(String qual, long ts) {
309     if (!putTimestamps.contains(ts)) {
310       put.add(FAMILY_BYTES, Bytes.toBytes(qual), ts, createValue(ts));
311       putTimestamps.add(ts);
312     }
313     if (VERBOSE) {
314       LOG.info("put: row " + Bytes.toStringBinary(put.getRow())
315           + ", cf " + FAMILY + ", qualifier " + qual + ", ts " + ts);
316     }
317   }
318 
319   private byte[] createValue(long ts) {
320     return Bytes.toBytes("value" + ts);
321   }
322 
323   public void delAtTimestamp(String qual, long ts) {
324     del.deleteColumn(FAMILY_BYTES, Bytes.toBytes(qual), ts);
325     logDelete(qual, ts, "at");
326   }
327 
328   private void logDelete(String qual, long ts, String delType) {
329     if (VERBOSE) {
330       LOG.info("del " + delType + ": row "
331           + Bytes.toStringBinary(put.getRow()) + ", cf " + FAMILY
332           + ", qualifier " + qual + ", ts " + ts);
333     }
334   }
335 
336   private void delUpToTimestamp(String qual, long upToTS) {
337     del.deleteColumns(FAMILY_BYTES, Bytes.toBytes(qual), upToTS);
338     logDelete(qual, upToTS, "up to and including");
339   }
340 
341   private long randLong(long n) {
342     long l = rand.nextLong();
343     if (l == Long.MIN_VALUE)
344       l = Long.MAX_VALUE;
345     return Math.abs(l) % n;
346   }
347 
348   private long randBetween(long a, long b) {
349     long x = a + randLong(b - a + 1);
350     assertTrue(a <= x && x <= b);
351     return x;
352   }
353 
354   private final String rowStr(int i) {
355     return ("row" + i).intern();
356   }
357 
358   private final byte[] rowBytes(int i) {
359     if (i == -1) {
360       return HConstants.EMPTY_BYTE_ARRAY;
361     }
362     return Bytes.toBytes(rowStr(i));
363   }
364 
365   private final String getQualStr(int i) {
366     return ("qual" + i).intern();
367   }
368 
369   public void createTimestampRange(long minTS, long maxTS,
370       long deleteUpToTS) throws IOException {
371     assertTrue(minTS < maxTS);
372     assertTrue(deleteUpToTS == -1
373         || (minTS <= deleteUpToTS && deleteUpToTS <= maxTS));
374 
375     for (int iRow = 0; iRow < NUM_ROWS; ++iRow) {
376       final String row = rowStr(iRow);
377       final byte[] rowBytes = Bytes.toBytes(row);
378       for (int iCol = 0; iCol < NUM_COLS; ++iCol) {
379         final String qual = getQualStr(iCol);
380         final byte[] qualBytes = Bytes.toBytes(qual);
381         put = new Put(rowBytes);
382 
383         putTimestamps.clear();
384         put(qual, minTS);
385         put(qual, maxTS);
386         for (int i = 0; i < PUTS_PER_ROW_COL; ++i) {
387           put(qual, randBetween(minTS, maxTS));
388         }
389 
390         long[] putTimestampList = new long[putTimestamps.size()];
391         {
392           int i = 0;
393           for (long ts : putTimestamps) {
394             putTimestampList[i++] = ts;
395           }
396         }
397 
398         // Delete a predetermined number of particular timestamps
399         delTimestamps.clear();
400         assertTrue(putTimestampList.length >= DELETES_PER_ROW_COL);
401         int numToDel = DELETES_PER_ROW_COL;
402         int tsRemaining = putTimestampList.length;
403         del = new Delete(rowBytes);
404         for (long ts : putTimestampList) {
405           if (rand.nextInt(tsRemaining) < numToDel) {
406             delAtTimestamp(qual, ts);
407             putTimestamps.remove(ts);
408             --numToDel;
409           }
410 
411           if (--tsRemaining == 0) {
412             break;
413           }
414         }
415 
416         // Another type of delete: everything up to the given timestamp.
417         if (deleteUpToTS != -1) {
418           delUpToTimestamp(qual, deleteUpToTS);
419         }
420 
421         region.put(put);
422         if (!del.isEmpty()) {
423           region.delete(del);
424         }
425 
426         // Add remaining timestamps (those we have not deleted) to expected
427         // results
428         for (long ts : putTimestamps) {
429           expectedKVs.add(new KeyValue(rowBytes, FAMILY_BYTES, qualBytes, ts,
430               KeyValue.Type.Put));
431         }
432       }
433     }
434 
435     region.flush(true);
436   }
437 
438   @After
439   public void tearDown() throws IOException {
440     if (region != null) {
441       HRegion.closeHRegion(region);
442     }
443 
444     // We have to re-set the lazy seek flag back to the default so that other
445     // unit tests are not affected.
446     StoreScanner.enableLazySeekGlobally(
447         StoreScanner.LAZY_SEEK_ENABLED_BY_DEFAULT);
448   }
449 
450 
451   public void assertKVListsEqual(String additionalMsg,
452       final List<? extends Cell> expected,
453       final List<? extends Cell> actual) {
454     final int eLen = expected.size();
455     final int aLen = actual.size();
456     final int minLen = Math.min(eLen, aLen);
457 
458     int i;
459     for (i = 0; i < minLen
460         && KeyValue.COMPARATOR.compareOnlyKeyPortion(expected.get(i), actual.get(i)) == 0;
461         ++i) {}
462 
463     if (additionalMsg == null) {
464       additionalMsg = "";
465     }
466     if (!additionalMsg.isEmpty()) {
467       additionalMsg = ". " + additionalMsg;
468     }
469 
470     if (eLen != aLen || i != minLen) {
471       throw new AssertionError(
472           "Expected and actual KV arrays differ at position " + i + ": " +
473           HBaseTestingUtility.safeGetAsStr(expected, i) + " (length " + eLen +") vs. " +
474           HBaseTestingUtility.safeGetAsStr(actual, i) + " (length " + aLen + ")" + additionalMsg);
475     }
476   }
477 
478 }
479