View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertArrayEquals;
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.TreeMap;
28  
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FSDataOutputStream;
31  import org.apache.hadoop.fs.FileStatus;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.HBaseTestingUtility;
35  import org.apache.hadoop.hbase.HColumnDescriptor;
36  import org.apache.hadoop.hbase.HConstants;
37  import org.apache.hadoop.hbase.HTableDescriptor;
38  import org.apache.hadoop.hbase.client.Connection;
39  import org.apache.hadoop.hbase.client.ConnectionFactory;
40  import org.apache.hadoop.hbase.NamespaceDescriptor;
41  import org.apache.hadoop.hbase.TableName;
42  import org.apache.hadoop.hbase.TableNotFoundException;
43  import org.apache.hadoop.hbase.client.HTable;
44  import org.apache.hadoop.hbase.client.Table;
45  import org.apache.hadoop.hbase.codec.KeyValueCodecWithTags;
46  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
47  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
48  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
49  import org.apache.hadoop.hbase.io.hfile.HFile;
50  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
51  import org.apache.hadoop.hbase.regionserver.BloomType;
52  import org.apache.hadoop.hbase.security.SecureBulkLoadUtil;
53  import org.apache.hadoop.hbase.testclassification.LargeTests;
54  import org.apache.hadoop.hbase.util.Bytes;
55  import org.apache.hadoop.hbase.util.HFileTestUtil;
56  import org.junit.AfterClass;
57  import org.junit.BeforeClass;
58  import org.junit.Rule;
59  import org.junit.Test;
60  import org.junit.experimental.categories.Category;
61  import org.junit.rules.TestName;
62  
63  /**
64   * Test cases for the "load" half of the HFileOutputFormat bulk load
65   * functionality. These tests run faster than the full MR cluster
66   * tests in TestHFileOutputFormat
67   */
68  @Category(LargeTests.class)
69  public class TestLoadIncrementalHFiles {
70    @Rule
71    public TestName tn = new TestName();
72  
73    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
74    private static final byte[] FAMILY = Bytes.toBytes("myfam");
75    private static final String NAMESPACE = "bulkNS";
76  
77    static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found";
78    static final int MAX_FILES_PER_REGION_PER_FAMILY = 4;
79  
80    private static final byte[][] SPLIT_KEYS = new byte[][] {
81      Bytes.toBytes("ddd"),
82      Bytes.toBytes("ppp")
83    };
84  
85    static HBaseTestingUtility util = new HBaseTestingUtility();
86  
87    @BeforeClass
88    public static void setUpBeforeClass() throws Exception {
89      util.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,"");
90      util.getConfiguration().setInt(
91        LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
92        MAX_FILES_PER_REGION_PER_FAMILY);
93      // change default behavior so that tag values are returned with normal rpcs
94      util.getConfiguration().set(HConstants.RPC_CODEC_CONF_KEY,
95          KeyValueCodecWithTags.class.getCanonicalName());
96      util.startMiniCluster();
97  
98      setupNamespace();
99    }
100 
101   protected static void setupNamespace() throws Exception {
102     util.getHBaseAdmin().createNamespace(NamespaceDescriptor.create(NAMESPACE).build());
103   }
104 
105   @AfterClass
106   public static void tearDownAfterClass() throws Exception {
107     util.shutdownMiniCluster();
108   }
109 
110   /**
111    * Test case that creates some regions and loads
112    * HFiles that fit snugly inside those regions
113    */
114   @Test(timeout = 120000)
115   public void testSimpleLoad() throws Exception {
116     runTest("testSimpleLoad", BloomType.NONE,
117         new byte[][][] {
118           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
119           new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
120     });
121   }
122 
123   /**
124    * Test case that creates some regions and loads
125    * HFiles that cross the boundaries of those regions
126    */
127   @Test(timeout = 120000)
128   public void testRegionCrossingLoad() throws Exception {
129     runTest("testRegionCrossingLoad", BloomType.NONE,
130         new byte[][][] {
131           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
132           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
133     });
134   }
135 
136   /**
137    * Test loading into a column family that has a ROW bloom filter.
138    */
139   @Test(timeout = 60000)
140   public void testRegionCrossingRowBloom() throws Exception {
141     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
142         new byte[][][] {
143           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
144           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
145     });
146   }
147 
148   /**
149    * Test loading into a column family that has a ROWCOL bloom filter.
150    */
151   @Test(timeout = 120000)
152   public void testRegionCrossingRowColBloom() throws Exception {
153     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
154         new byte[][][] {
155           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
156           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
157     });
158   }
159 
160   /**
161    * Test case that creates some regions and loads HFiles that have
162    * different region boundaries than the table pre-split.
163    */
164   @Test(timeout = 120000)
165   public void testSimpleHFileSplit() throws Exception {
166     runTest("testHFileSplit", BloomType.NONE,
167         new byte[][] {
168           Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
169           Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
170         },
171         new byte[][][] {
172           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("lll") },
173           new byte[][]{ Bytes.toBytes("mmm"), Bytes.toBytes("zzz") },
174         }
175     );
176   }
177 
178   /**
179    * Test case that creates some regions and loads HFiles that cross the boundaries
180    * and have different region boundaries than the table pre-split.
181    */
182   @Test(timeout = 60000)
183   public void testRegionCrossingHFileSplit() throws Exception {
184     testRegionCrossingHFileSplit(BloomType.NONE);
185   }
186 
187   /**
188    * Test case that creates some regions and loads HFiles that cross the boundaries
189    * have a ROW bloom filter and a different region boundaries than the table pre-split.
190    */
191   @Test(timeout = 120000)
192   public void testRegionCrossingHFileSplitRowBloom() throws Exception {
193     testRegionCrossingHFileSplit(BloomType.ROW);
194   }
195 
196   /**
197    * Test case that creates some regions and loads HFiles that cross the boundaries
198    * have a ROWCOL bloom filter and a different region boundaries than the table pre-split.
199    */
200   @Test(timeout = 120000)
201   public void testRegionCrossingHFileSplitRowColBloom() throws Exception {
202     testRegionCrossingHFileSplit(BloomType.ROWCOL);
203   }
204 
205   @Test
206   public void testSplitALot() throws Exception {
207     runTest("testSplitALot", BloomType.NONE,
208       new byte[][] {
209         Bytes.toBytes("aaaa"), Bytes.toBytes("bbb"),
210         Bytes.toBytes("ccc"), Bytes.toBytes("ddd"),
211         Bytes.toBytes("eee"), Bytes.toBytes("fff"),
212         Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
213         Bytes.toBytes("iii"), Bytes.toBytes("lll"),
214         Bytes.toBytes("mmm"), Bytes.toBytes("nnn"),
215         Bytes.toBytes("ooo"), Bytes.toBytes("ppp"),
216         Bytes.toBytes("qqq"), Bytes.toBytes("rrr"),
217         Bytes.toBytes("sss"), Bytes.toBytes("ttt"),
218         Bytes.toBytes("uuu"), Bytes.toBytes("vvv"),
219         Bytes.toBytes("zzz"),
220       },
221       new byte[][][] {
222         new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("zzz") },
223       }
224     );
225   }
226 
227   private void testRegionCrossingHFileSplit(BloomType bloomType) throws Exception {
228     runTest("testHFileSplit" + bloomType + "Bloom", bloomType,
229         new byte[][] {
230           Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
231           Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
232         },
233         new byte[][][] {
234           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
235           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
236         }
237     );
238   }
239 
240   private HTableDescriptor buildHTD(TableName tableName, BloomType bloomType) {
241     HTableDescriptor htd = new HTableDescriptor(tableName);
242     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
243     familyDesc.setBloomFilterType(bloomType);
244     htd.addFamily(familyDesc);
245     return htd;
246   }
247 
248   private void runTest(String testName, BloomType bloomType,
249       byte[][][] hfileRanges) throws Exception {
250     runTest(testName, bloomType, null, hfileRanges);
251   }
252 
253   private void runTest(String testName, BloomType bloomType,
254       byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
255     final byte[] TABLE_NAME = Bytes.toBytes("mytable_"+testName);
256     final boolean preCreateTable = tableSplitKeys != null;
257 
258     // Run the test bulkloading the table to the default namespace
259     final TableName TABLE_WITHOUT_NS = TableName.valueOf(TABLE_NAME);
260     runTest(testName, TABLE_WITHOUT_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
261 
262     // Run the test bulkloading the table to the specified namespace
263     final TableName TABLE_WITH_NS = TableName.valueOf(Bytes.toBytes(NAMESPACE), TABLE_NAME);
264     runTest(testName, TABLE_WITH_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
265   }
266 
267   private void runTest(String testName, TableName tableName, BloomType bloomType,
268       boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
269     HTableDescriptor htd = buildHTD(tableName, bloomType);
270     runTest(testName, htd, bloomType, preCreateTable, tableSplitKeys, hfileRanges);
271   }
272 
273   private void runTest(String testName, HTableDescriptor htd, BloomType bloomType,
274       boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception {
275 
276     for (boolean managed : new boolean[] { true, false }) {
277       Path dir = util.getDataTestDirOnTestFS(testName);
278       FileSystem fs = util.getTestFileSystem();
279       dir = dir.makeQualified(fs);
280       Path familyDir = new Path(dir, Bytes.toString(FAMILY));
281 
282       int hfileIdx = 0;
283       for (byte[][] range : hfileRanges) {
284         byte[] from = range[0];
285         byte[] to = range[1];
286         HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
287             + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
288       }
289       int expectedRows = hfileIdx * 1000;
290 
291       if (preCreateTable) {
292         util.getHBaseAdmin().createTable(htd, tableSplitKeys);
293       }
294 
295       final TableName tableName = htd.getTableName();
296       if (!util.getHBaseAdmin().tableExists(tableName)) {
297         util.getHBaseAdmin().createTable(htd);
298       }
299       LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
300 
301       if (managed) {
302         try (HTable table = new HTable(util.getConfiguration(), tableName)) {
303           loader.doBulkLoad(dir, table);
304           assertEquals(expectedRows, util.countRows(table));
305         }
306       } else {
307         try (Connection conn = ConnectionFactory.createConnection(util.getConfiguration());
308             HTable table = (HTable) conn.getTable(tableName)) {
309           loader.doBulkLoad(dir, table);
310         }
311       }
312 
313       // verify staging folder has been cleaned up
314       Path stagingBasePath = SecureBulkLoadUtil.getBaseStagingDir(util.getConfiguration());
315       if (fs.exists(stagingBasePath)) {
316         FileStatus[] files = fs.listStatus(stagingBasePath);
317         for (FileStatus file : files) {
318           assertTrue("Folder=" + file.getPath() + " is not cleaned up.",
319               file.getPath().getName() != "DONOTERASE");
320         }
321       }
322 
323       util.deleteTable(tableName);
324     }
325   }
326 
327   /**
328    * Test that tags survive through a bulk load that needs to split hfiles.
329    *
330    * This test depends on the "hbase.client.rpc.codec" =  KeyValueCodecWithTags so that the client
331    * can get tags in the responses.
332    */
333   @Test(timeout = 60000)
334   public void htestTagsSurviveBulkLoadSplit() throws Exception {
335     Path dir = util.getDataTestDirOnTestFS(tn.getMethodName());
336     FileSystem fs = util.getTestFileSystem();
337     dir = dir.makeQualified(fs);
338     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
339     // table has these split points
340     byte [][] tableSplitKeys = new byte[][] {
341             Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
342             Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"),
343     };
344 
345     // creating an hfile that has values that span the split points.
346     byte[] from = Bytes.toBytes("ddd");
347     byte[] to = Bytes.toBytes("ooo");
348     HFileTestUtil.createHFileWithTags(util.getConfiguration(), fs,
349         new Path(familyDir, tn.getMethodName()+"_hfile"),
350         FAMILY, QUALIFIER, from, to, 1000);
351     int expectedRows = 1000;
352 
353     TableName tableName = TableName.valueOf(tn.getMethodName());
354     HTableDescriptor htd = buildHTD(tableName, BloomType.NONE);
355     util.getHBaseAdmin().createTable(htd, tableSplitKeys);
356 
357     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
358     String [] args= {dir.toString(), tableName.toString()};
359     loader.run(args);
360 
361     Table table = util.getConnection().getTable(tableName);
362     try {
363       assertEquals(expectedRows, util.countRows(table));
364       HFileTestUtil.verifyTags(table);
365     } finally {
366       table.close();
367     }
368 
369     util.deleteTable(tableName);
370   }
371 
372   /**
373    * Test loading into a column family that does not exist.
374    */
375   @Test(timeout = 60000)
376   public void testNonexistentColumnFamilyLoad() throws Exception {
377     String testName = "testNonexistentColumnFamilyLoad";
378     byte[][][] hFileRanges = new byte[][][] {
379       new byte[][]{ Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
380       new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
381     };
382 
383     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
384     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE));
385     // set real family name to upper case in purpose to simulate the case that
386     // family name in HFiles is invalid
387     HColumnDescriptor family =
388         new HColumnDescriptor(Bytes.toBytes(new String(FAMILY).toUpperCase()));
389     htd.addFamily(family);
390 
391     try {
392       runTest(testName, htd, BloomType.NONE, true, SPLIT_KEYS, hFileRanges);
393       assertTrue("Loading into table with non-existent family should have failed", false);
394     } catch (Exception e) {
395       assertTrue("IOException expected", e instanceof IOException);
396       // further check whether the exception message is correct
397       String errMsg = e.getMessage();
398       assertTrue("Incorrect exception message, expected message: ["
399           + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY + "], current message: [" + errMsg + "]",
400           errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY));
401     }
402   }
403 
404   @Test(timeout = 120000)
405   public void testNonHfileFolderWithUnmatchedFamilyName() throws Exception {
406     testNonHfileFolder("testNonHfileFolderWithUnmatchedFamilyName", true);
407   }
408 
409   @Test(timeout = 120000)
410   public void testNonHfileFolder() throws Exception {
411     testNonHfileFolder("testNonHfileFolder", false);
412   }
413 
414   /**
415    * Write a random data file and a non-file in a dir with a valid family name
416    * but not part of the table families. we should we able to bulkload without
417    * getting the unmatched family exception. HBASE-13037/HBASE-13227
418    */
419   private void testNonHfileFolder(String tableName, boolean preCreateTable) throws Exception {
420     Path dir = util.getDataTestDirOnTestFS(tableName);
421     FileSystem fs = util.getTestFileSystem();
422     dir = dir.makeQualified(fs);
423 
424     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
425     HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_0"),
426         FAMILY, QUALIFIER, Bytes.toBytes("begin"), Bytes.toBytes("end"), 500);
427     createRandomDataFile(fs, new Path(familyDir, "012356789"), 16 * 1024);
428 
429     final String NON_FAMILY_FOLDER = "_logs";
430     Path nonFamilyDir = new Path(dir, NON_FAMILY_FOLDER);
431     fs.mkdirs(nonFamilyDir);
432     fs.mkdirs(new Path(nonFamilyDir, "non-file"));
433     createRandomDataFile(fs, new Path(nonFamilyDir, "012356789"), 16 * 1024);
434 
435     Table table = null;
436     try {
437       if (preCreateTable) {
438         table = util.createTable(TableName.valueOf(tableName), FAMILY);
439       } else {
440         table = util.getConnection().getTable(TableName.valueOf(tableName));
441       }
442 
443       final String[] args = {dir.toString(), tableName};
444       new LoadIncrementalHFiles(util.getConfiguration()).run(args);
445       assertEquals(500, util.countRows(table));
446     } finally {
447       if (table != null) {
448         table.close();
449       }
450       fs.delete(dir, true);
451     }
452   }
453 
454   private static void createRandomDataFile(FileSystem fs, Path path, int size)
455       throws IOException {
456     FSDataOutputStream stream = fs.create(path);
457     try {
458       byte[] data = new byte[1024];
459       for (int i = 0; i < data.length; ++i) {
460         data[i] = (byte)(i & 0xff);
461       }
462       while (size >= data.length) {
463         stream.write(data, 0, data.length);
464         size -= data.length;
465       }
466       if (size > 0) {
467         stream.write(data, 0, size);
468       }
469     } finally {
470       stream.close();
471     }
472   }
473 
474   @Test(timeout = 120000)
475   public void testSplitStoreFile() throws IOException {
476     Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
477     FileSystem fs = util.getTestFileSystem();
478     Path testIn = new Path(dir, "testhfile");
479     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
480     HFileTestUtil.createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
481         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
482 
483     Path bottomOut = new Path(dir, "bottom.out");
484     Path topOut = new Path(dir, "top.out");
485 
486     LoadIncrementalHFiles.splitStoreFile(
487         util.getConfiguration(), testIn,
488         familyDesc, Bytes.toBytes("ggg"),
489         bottomOut,
490         topOut);
491 
492     int rowCount = verifyHFile(bottomOut);
493     rowCount += verifyHFile(topOut);
494     assertEquals(1000, rowCount);
495   }
496 
497   @Test
498   public void testSplitStoreFileWithNoneToNone() throws IOException {
499     testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.NONE, DataBlockEncoding.NONE);
500   }
501 
502   @Test
503   public void testSplitStoreFileWithEncodedToEncoded() throws IOException {
504     testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.DIFF, DataBlockEncoding.DIFF);
505   }
506 
507   @Test
508   public void testSplitStoreFileWithEncodedToNone() throws IOException {
509     testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.DIFF, DataBlockEncoding.NONE);
510   }
511 
512   @Test
513   public void testSplitStoreFileWithNoneToEncoded() throws IOException {
514     testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.NONE, DataBlockEncoding.DIFF);
515   }
516 
517   private void testSplitStoreFileWithDifferentEncoding(DataBlockEncoding bulkloadEncoding,
518       DataBlockEncoding cfEncoding) throws IOException {
519     Path dir = util.getDataTestDirOnTestFS("testSplitHFileWithDifferentEncoding");
520     FileSystem fs = util.getTestFileSystem();
521     Path testIn = new Path(dir, "testhfile");
522     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
523     familyDesc.setDataBlockEncoding(cfEncoding);
524     HFileTestUtil.createHFileWithDataBlockEncoding(
525         util.getConfiguration(), fs, testIn, bulkloadEncoding,
526         FAMILY, QUALIFIER, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
527 
528     Path bottomOut = new Path(dir, "bottom.out");
529     Path topOut = new Path(dir, "top.out");
530 
531     LoadIncrementalHFiles.splitStoreFile(
532         util.getConfiguration(), testIn,
533         familyDesc, Bytes.toBytes("ggg"),
534         bottomOut,
535         topOut);
536 
537     int rowCount = verifyHFile(bottomOut);
538     rowCount += verifyHFile(topOut);
539     assertEquals(1000, rowCount);
540   }
541 
542   private int verifyHFile(Path p) throws IOException {
543     Configuration conf = util.getConfiguration();
544     HFile.Reader reader = HFile.createReader(
545         p.getFileSystem(conf), p, new CacheConfig(conf), conf);
546     reader.loadFileInfo();
547     HFileScanner scanner = reader.getScanner(false, false);
548     scanner.seekTo();
549     int count = 0;
550     do {
551       count++;
552     } while (scanner.next());
553     assertTrue(count > 0);
554     reader.close();
555     return count;
556   }
557 
558   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
559     Integer value = map.containsKey(first)?map.get(first):0;
560     map.put(first, value+1);
561 
562     value = map.containsKey(last)?map.get(last):0;
563     map.put(last, value-1);
564   }
565 
566   @Test(timeout = 120000)
567   public void testInferBoundaries() {
568     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
569 
570     /* Toy example
571      *     c---------i            o------p          s---------t     v------x
572      * a------e    g-----k   m-------------q   r----s            u----w
573      *
574      * Should be inferred as:
575      * a-----------------k   m-------------q   r--------------t  u---------x
576      *
577      * The output should be (m,r,u)
578      */
579 
580     String first;
581     String last;
582 
583     first = "a"; last = "e";
584     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
585 
586     first = "r"; last = "s";
587     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
588 
589     first = "o"; last = "p";
590     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
591 
592     first = "g"; last = "k";
593     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
594 
595     first = "v"; last = "x";
596     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
597 
598     first = "c"; last = "i";
599     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
600 
601     first = "m"; last = "q";
602     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
603 
604     first = "s"; last = "t";
605     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
606 
607     first = "u"; last = "w";
608     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
609 
610     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
611     byte[][] compare = new byte[3][];
612     compare[0] = "m".getBytes();
613     compare[1] = "r".getBytes();
614     compare[2] = "u".getBytes();
615 
616     assertEquals(keysArray.length, 3);
617 
618     for (int row = 0; row<keysArray.length; row++){
619       assertArrayEquals(keysArray[row], compare[row]);
620     }
621   }
622 
623   @Test(timeout = 60000)
624   public void testLoadTooMayHFiles() throws Exception {
625     Path dir = util.getDataTestDirOnTestFS("testLoadTooMayHFiles");
626     FileSystem fs = util.getTestFileSystem();
627     dir = dir.makeQualified(fs);
628     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
629 
630     byte[] from = Bytes.toBytes("begin");
631     byte[] to = Bytes.toBytes("end");
632     for (int i = 0; i <= MAX_FILES_PER_REGION_PER_FAMILY; i++) {
633       HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
634           + i), FAMILY, QUALIFIER, from, to, 1000);
635     }
636 
637     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
638     String [] args= {dir.toString(), "mytable_testLoadTooMayHFiles"};
639     try {
640       loader.run(args);
641       fail("Bulk loading too many files should fail");
642     } catch (IOException ie) {
643       assertTrue(ie.getMessage().contains("Trying to load more than "
644         + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles"));
645     }
646   }
647 
648   @Test(expected = TableNotFoundException.class)
649   public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {
650     Configuration conf = util.getConfiguration();
651     conf.set(LoadIncrementalHFiles.CREATE_TABLE_CONF_KEY, "no");
652     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
653     String[] args = { "directory", "nonExistingTable" };
654     loader.run(args);
655   }
656 
657   @Test(timeout = 120000)
658   public void testTableWithCFNameStartWithUnderScore() throws Exception {
659     Path dir = util.getDataTestDirOnTestFS("cfNameStartWithUnderScore");
660     FileSystem fs = util.getTestFileSystem();
661     dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
662     String family = "_cf";
663     Path familyDir = new Path(dir, family);
664 
665     byte[] from = Bytes.toBytes("begin");
666     byte[] to = Bytes.toBytes("end");
667     Configuration conf = util.getConfiguration();
668     String tableName = "mytable_cfNameStartWithUnderScore";
669     Table table = util.createTable(TableName.valueOf(tableName), family);
670     HFileTestUtil.createHFile(conf, fs, new Path(familyDir, "hfile"), Bytes.toBytes(family),
671       QUALIFIER, from, to, 1000);
672 
673     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
674     String[] args = { dir.toString(), tableName };
675     try {
676       loader.run(args);
677       assertEquals(1000, util.countRows(table));
678     } finally {
679       if (null != table) {
680         table.close();
681       }
682     }
683   }
684 }
685