View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.snapshot;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  import static org.junit.Assert.fail;
23  
24  import java.io.IOException;
25  import java.util.Collections;
26  import java.util.Comparator;
27  import java.util.HashMap;
28  import java.util.List;
29  import java.util.Map;
30  import java.util.concurrent.CountDownLatch;
31  
32  import org.apache.commons.logging.Log;
33  import org.apache.commons.logging.LogFactory;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FileSystem;
36  import org.apache.hadoop.fs.Path;
37  import org.apache.hadoop.hbase.HBaseTestingUtility;
38  import org.apache.hadoop.hbase.HConstants;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.TableName;
41  import org.apache.hadoop.hbase.TableNotFoundException;
42  import org.apache.hadoop.hbase.client.Admin;
43  import org.apache.hadoop.hbase.client.Table;
44  import org.apache.hadoop.hbase.master.HMaster;
45  import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
46  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
47  import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
48  import org.apache.hadoop.hbase.testclassification.LargeTests;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.junit.After;
51  import org.junit.AfterClass;
52  import org.junit.Before;
53  import org.junit.BeforeClass;
54  import org.junit.Test;
55  import org.junit.experimental.categories.Category;
56  
57  /**
58   * Test creating/using/deleting snapshots from the client
59   * <p>
60   * This is an end-to-end test for the snapshot utility
61   *
62   * TODO This is essentially a clone of TestSnapshotFromClient.  This is worth refactoring this
63   * because there will be a few more flavors of snapshots that need to run these tests.
64   */
65  @Category(LargeTests.class)
66  public class TestFlushSnapshotFromClient {
67    private static final Log LOG = LogFactory.getLog(TestFlushSnapshotFromClient.class);
68    private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
69    private static final int NUM_RS = 2;
70    private static final byte[] TEST_FAM = Bytes.toBytes("fam");
71    private static final TableName TABLE_NAME = TableName.valueOf("test");
72    private final int DEFAULT_NUM_ROWS = 100;
73  
74    /**
75     * Setup the config for the cluster
76     * @throws Exception on failure
77     */
78    @BeforeClass
79    public static void setupCluster() throws Exception {
80      // Uncomment the following lines if more verbosity is needed for
81      // debugging (see HBASE-12285 for details).
82      //((Log4JLogger)RpcServer.LOG).getLogger().setLevel(Level.ALL);
83      //((Log4JLogger)AbstractRpcClient.LOG).getLogger().setLevel(Level.ALL);
84      //((Log4JLogger)ScannerCallable.LOG).getLogger().setLevel(Level.ALL);
85      setupConf(UTIL.getConfiguration());
86      UTIL.startMiniCluster(NUM_RS);
87    }
88  
89    private static void setupConf(Configuration conf) {
90      // disable the ui
91      conf.setInt("hbase.regionsever.info.port", -1);
92      // change the flush size to a small amount, regulating number of store files
93      conf.setInt("hbase.hregion.memstore.flush.size", 25000);
94      // so make sure we get a compaction when doing a load, but keep around some
95      // files in the store
96      conf.setInt("hbase.hstore.compaction.min", 10);
97      conf.setInt("hbase.hstore.compactionThreshold", 10);
98      // block writes if we get to 12 store files
99      conf.setInt("hbase.hstore.blockingStoreFiles", 12);
100     // Enable snapshot
101     conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
102     conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
103       ConstantSizeRegionSplitPolicy.class.getName());
104   }
105 
106   @Before
107   public void setup() throws Exception {
108     SnapshotTestingUtils.createTable(UTIL, TABLE_NAME, TEST_FAM);
109   }
110 
111   @After
112   public void tearDown() throws Exception {
113     UTIL.deleteTable(TABLE_NAME);
114 
115     SnapshotTestingUtils.deleteAllSnapshots(UTIL.getHBaseAdmin());
116     SnapshotTestingUtils.deleteArchiveDirectory(UTIL);
117   }
118 
119   @AfterClass
120   public static void cleanupTest() throws Exception {
121     try {
122       UTIL.shutdownMiniCluster();
123     } catch (Exception e) {
124       LOG.warn("failure shutting down cluster", e);
125     }
126   }
127 
128   /**
129    * Test simple flush snapshotting a table that is online
130    * @throws Exception
131    */
132   @Test (timeout=300000)
133   public void testFlushTableSnapshot() throws Exception {
134     Admin admin = UTIL.getHBaseAdmin();
135     // make sure we don't fail on listing snapshots
136     SnapshotTestingUtils.assertNoSnapshots(admin);
137 
138     // put some stuff in the table
139     SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
140 
141     LOG.debug("FS state before snapshot:");
142     UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
143 
144     // take a snapshot of the enabled table
145     String snapshotString = "offlineTableSnapshot";
146     byte[] snapshot = Bytes.toBytes(snapshotString);
147     admin.snapshot(snapshotString, TABLE_NAME, SnapshotDescription.Type.FLUSH);
148     LOG.debug("Snapshot completed.");
149 
150     // make sure we have the snapshot
151     List<SnapshotDescription> snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin,
152       snapshot, TABLE_NAME);
153 
154     // make sure its a valid snapshot
155     LOG.debug("FS state after snapshot:");
156     UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
157 
158     SnapshotTestingUtils.confirmSnapshotValid(UTIL, snapshots.get(0), TABLE_NAME, TEST_FAM);
159   }
160 
161    /**
162    * Test snapshotting a table that is online without flushing
163    * @throws Exception
164    */
165   @Test(timeout=30000)
166   public void testSkipFlushTableSnapshot() throws Exception {
167     Admin admin = UTIL.getHBaseAdmin();
168     // make sure we don't fail on listing snapshots
169     SnapshotTestingUtils.assertNoSnapshots(admin);
170 
171     // put some stuff in the table
172     try (Table table = UTIL.getConnection().getTable(TABLE_NAME)) {
173       UTIL.loadTable(table, TEST_FAM);
174     }
175 
176     LOG.debug("FS state before snapshot:");
177     UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
178 
179     // take a snapshot of the enabled table
180     String snapshotString = "skipFlushTableSnapshot";
181     byte[] snapshot = Bytes.toBytes(snapshotString);
182     admin.snapshot(snapshotString, TABLE_NAME, SnapshotDescription.Type.SKIPFLUSH);
183     LOG.debug("Snapshot completed.");
184 
185     // make sure we have the snapshot
186     List<SnapshotDescription> snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin,
187         snapshot, TABLE_NAME);
188 
189     // make sure its a valid snapshot
190     LOG.debug("FS state after snapshot:");
191     UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
192 
193     SnapshotTestingUtils.confirmSnapshotValid(UTIL, snapshots.get(0), TABLE_NAME, TEST_FAM);
194 
195     admin.deleteSnapshot(snapshot);
196     snapshots = admin.listSnapshots();
197     SnapshotTestingUtils.assertNoSnapshots(admin);
198   }
199 
200 
201   /**
202    * Test simple flush snapshotting a table that is online
203    * @throws Exception
204    */
205   @Test (timeout=300000)
206   public void testFlushTableSnapshotWithProcedure() throws Exception {
207     Admin admin = UTIL.getHBaseAdmin();
208     // make sure we don't fail on listing snapshots
209     SnapshotTestingUtils.assertNoSnapshots(admin);
210 
211     // put some stuff in the table
212     SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
213 
214     LOG.debug("FS state before snapshot:");
215     UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
216 
217     // take a snapshot of the enabled table
218     String snapshotString = "offlineTableSnapshot";
219     byte[] snapshot = Bytes.toBytes(snapshotString);
220     Map<String, String> props = new HashMap<String, String>();
221     props.put("table", TABLE_NAME.getNameAsString());
222     admin.execProcedure(SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION,
223         snapshotString, props);
224 
225 
226     LOG.debug("Snapshot completed.");
227 
228     // make sure we have the snapshot
229     List<SnapshotDescription> snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin,
230       snapshot, TABLE_NAME);
231 
232     // make sure its a valid snapshot
233     LOG.debug("FS state after snapshot:");
234     UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
235 
236     SnapshotTestingUtils.confirmSnapshotValid(UTIL, snapshots.get(0), TABLE_NAME, TEST_FAM);
237   }
238 
239   @Test (timeout=300000)
240   public void testSnapshotFailsOnNonExistantTable() throws Exception {
241     Admin admin = UTIL.getHBaseAdmin();
242     // make sure we don't fail on listing snapshots
243     SnapshotTestingUtils.assertNoSnapshots(admin);
244     TableName tableName = TableName.valueOf("_not_a_table");
245 
246     // make sure the table doesn't exist
247     boolean fail = false;
248     do {
249     try {
250       admin.getTableDescriptor(tableName);
251       fail = true;
252       LOG.error("Table:" + tableName + " already exists, checking a new name");
253       tableName = TableName.valueOf(tableName+"!");
254     } catch (TableNotFoundException e) {
255       fail = false;
256       }
257     } while (fail);
258 
259     // snapshot the non-existant table
260     try {
261       admin.snapshot("fail", tableName, SnapshotDescription.Type.FLUSH);
262       fail("Snapshot succeeded even though there is not table.");
263     } catch (SnapshotCreationException e) {
264       LOG.info("Correctly failed to snapshot a non-existant table:" + e.getMessage());
265     }
266   }
267 
268   @Test(timeout = 300000)
269   public void testAsyncFlushSnapshot() throws Exception {
270     Admin admin = UTIL.getHBaseAdmin();
271     SnapshotDescription snapshot = SnapshotDescription.newBuilder().setName("asyncSnapshot")
272         .setTable(TABLE_NAME.getNameAsString())
273         .setType(SnapshotDescription.Type.FLUSH)
274         .build();
275 
276     // take the snapshot async
277     admin.takeSnapshotAsync(snapshot);
278 
279     // constantly loop, looking for the snapshot to complete
280     HMaster master = UTIL.getMiniHBaseCluster().getMaster();
281     SnapshotTestingUtils.waitForSnapshotToComplete(master, snapshot, 200);
282     LOG.info(" === Async Snapshot Completed ===");
283     UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
284 
285     // make sure we get the snapshot
286     SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot);
287   }
288 
289   @Test (timeout=300000)
290   public void testSnapshotStateAfterMerge() throws Exception {
291     int numRows = DEFAULT_NUM_ROWS;
292     Admin admin = UTIL.getHBaseAdmin();
293     // make sure we don't fail on listing snapshots
294     SnapshotTestingUtils.assertNoSnapshots(admin);
295     // load the table so we have some data
296     SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM);
297 
298     // Take a snapshot
299     String snapshotBeforeMergeName = "snapshotBeforeMerge";
300     admin.snapshot(snapshotBeforeMergeName, TABLE_NAME, SnapshotDescription.Type.FLUSH);
301 
302     // Clone the table
303     TableName cloneBeforeMergeName = TableName.valueOf("cloneBeforeMerge");
304     admin.cloneSnapshot(snapshotBeforeMergeName, cloneBeforeMergeName);
305     SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneBeforeMergeName);
306 
307     // Merge two regions
308     List<HRegionInfo> regions = admin.getTableRegions(TABLE_NAME);
309     Collections.sort(regions, new Comparator<HRegionInfo>() {
310       public int compare(HRegionInfo r1, HRegionInfo r2) {
311         return Bytes.compareTo(r1.getStartKey(), r2.getStartKey());
312       }
313     });
314 
315     int numRegions = admin.getTableRegions(TABLE_NAME).size();
316     int numRegionsAfterMerge = numRegions - 2;
317     admin.mergeRegions(regions.get(1).getEncodedNameAsBytes(),
318         regions.get(2).getEncodedNameAsBytes(), true);
319     admin.mergeRegions(regions.get(5).getEncodedNameAsBytes(),
320         regions.get(6).getEncodedNameAsBytes(), true);
321 
322     // Verify that there's one region less
323     waitRegionsAfterMerge(numRegionsAfterMerge);
324     assertEquals(numRegionsAfterMerge, admin.getTableRegions(TABLE_NAME).size());
325 
326     // Clone the table
327     TableName cloneAfterMergeName = TableName.valueOf("cloneAfterMerge");
328     admin.cloneSnapshot(snapshotBeforeMergeName, cloneAfterMergeName);
329     SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneAfterMergeName);
330 
331     SnapshotTestingUtils.verifyRowCount(UTIL, TABLE_NAME, numRows);
332     SnapshotTestingUtils.verifyRowCount(UTIL, cloneBeforeMergeName, numRows);
333     SnapshotTestingUtils.verifyRowCount(UTIL, cloneAfterMergeName, numRows);
334 
335     // test that we can delete the snapshot
336     UTIL.deleteTable(cloneAfterMergeName);
337     UTIL.deleteTable(cloneBeforeMergeName);
338   }
339 
340   @Test (timeout=300000)
341   public void testTakeSnapshotAfterMerge() throws Exception {
342     int numRows = DEFAULT_NUM_ROWS;
343     Admin admin = UTIL.getHBaseAdmin();
344     // make sure we don't fail on listing snapshots
345     SnapshotTestingUtils.assertNoSnapshots(admin);
346     // load the table so we have some data
347     SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM);
348 
349     // Merge two regions
350     List<HRegionInfo> regions = admin.getTableRegions(TABLE_NAME);
351     Collections.sort(regions, new Comparator<HRegionInfo>() {
352       public int compare(HRegionInfo r1, HRegionInfo r2) {
353         return Bytes.compareTo(r1.getStartKey(), r2.getStartKey());
354       }
355     });
356 
357     int numRegions = admin.getTableRegions(TABLE_NAME).size();
358     int numRegionsAfterMerge = numRegions - 2;
359     admin.mergeRegions(regions.get(1).getEncodedNameAsBytes(),
360         regions.get(2).getEncodedNameAsBytes(), true);
361     admin.mergeRegions(regions.get(5).getEncodedNameAsBytes(),
362         regions.get(6).getEncodedNameAsBytes(), true);
363 
364     waitRegionsAfterMerge(numRegionsAfterMerge);
365     assertEquals(numRegionsAfterMerge, admin.getTableRegions(TABLE_NAME).size());
366 
367     // Take a snapshot
368     String snapshotName = "snapshotAfterMerge";
369     SnapshotTestingUtils.snapshot(admin, snapshotName, TABLE_NAME.getNameAsString(),
370       SnapshotDescription.Type.FLUSH, 3);
371 
372     // Clone the table
373     TableName cloneName = TableName.valueOf("cloneMerge");
374     admin.cloneSnapshot(snapshotName, cloneName);
375     SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneName);
376 
377     SnapshotTestingUtils.verifyRowCount(UTIL, TABLE_NAME, numRows);
378     SnapshotTestingUtils.verifyRowCount(UTIL, cloneName, numRows);
379 
380     // test that we can delete the snapshot
381     UTIL.deleteTable(cloneName);
382   }
383 
384   /**
385    * Basic end-to-end test of simple-flush-based snapshots
386    */
387   @Test (timeout=300000)
388   public void testFlushCreateListDestroy() throws Exception {
389     LOG.debug("------- Starting Snapshot test -------------");
390     Admin admin = UTIL.getHBaseAdmin();
391     // make sure we don't fail on listing snapshots
392     SnapshotTestingUtils.assertNoSnapshots(admin);
393     // load the table so we have some data
394     SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
395 
396     String snapshotName = "flushSnapshotCreateListDestroy";
397     FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
398     Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
399     SnapshotTestingUtils.createSnapshotAndValidate(admin, TABLE_NAME, Bytes.toString(TEST_FAM),
400       snapshotName, rootDir, fs, true);
401   }
402 
403   /**
404    * Demonstrate that we reject snapshot requests if there is a snapshot already running on the
405    * same table currently running and that concurrent snapshots on different tables can both
406    * succeed concurretly.
407    */
408   @Test(timeout=300000)
409   public void testConcurrentSnapshottingAttempts() throws IOException, InterruptedException {
410     final TableName TABLE2_NAME = TableName.valueOf(TABLE_NAME + "2");
411 
412     int ssNum = 20;
413     Admin admin = UTIL.getHBaseAdmin();
414     // make sure we don't fail on listing snapshots
415     SnapshotTestingUtils.assertNoSnapshots(admin);
416     // create second testing table
417     SnapshotTestingUtils.createTable(UTIL, TABLE2_NAME, TEST_FAM);
418     // load the table so we have some data
419     SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
420     SnapshotTestingUtils.loadData(UTIL, TABLE2_NAME, DEFAULT_NUM_ROWS, TEST_FAM);
421 
422     final CountDownLatch toBeSubmitted = new CountDownLatch(ssNum);
423     // We'll have one of these per thread
424     class SSRunnable implements Runnable {
425       SnapshotDescription ss;
426       SSRunnable(SnapshotDescription ss) {
427         this.ss = ss;
428       }
429 
430       @Override
431       public void run() {
432         try {
433           Admin admin = UTIL.getHBaseAdmin();
434           LOG.info("Submitting snapshot request: " + ClientSnapshotDescriptionUtils.toString(ss));
435           admin.takeSnapshotAsync(ss);
436         } catch (Exception e) {
437           LOG.info("Exception during snapshot request: " + ClientSnapshotDescriptionUtils.toString(
438               ss)
439               + ".  This is ok, we expect some", e);
440         }
441         LOG.info("Submitted snapshot request: " + ClientSnapshotDescriptionUtils.toString(ss));
442         toBeSubmitted.countDown();
443       }
444     };
445 
446     // build descriptions
447     SnapshotDescription[] descs = new SnapshotDescription[ssNum];
448     for (int i = 0; i < ssNum; i++) {
449       SnapshotDescription.Builder builder = SnapshotDescription.newBuilder();
450       builder.setTable(((i % 2) == 0 ? TABLE_NAME : TABLE2_NAME).getNameAsString());
451       builder.setName("ss"+i);
452       builder.setType(SnapshotDescription.Type.FLUSH);
453       descs[i] = builder.build();
454     }
455 
456     // kick each off its own thread
457     for (int i=0 ; i < ssNum; i++) {
458       new Thread(new SSRunnable(descs[i])).start();
459     }
460 
461     // wait until all have been submitted
462     toBeSubmitted.await();
463 
464     // loop until all are done.
465     while (true) {
466       int doneCount = 0;
467       for (SnapshotDescription ss : descs) {
468         try {
469           if (admin.isSnapshotFinished(ss)) {
470             doneCount++;
471           }
472         } catch (Exception e) {
473           LOG.warn("Got an exception when checking for snapshot " + ss.getName(), e);
474           doneCount++;
475         }
476       }
477       if (doneCount == descs.length) {
478         break;
479       }
480       Thread.sleep(100);
481     }
482 
483     // dump for debugging
484     UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);
485 
486     List<SnapshotDescription> taken = admin.listSnapshots();
487     int takenSize = taken.size();
488     LOG.info("Taken " + takenSize + " snapshots:  " + taken);
489     assertTrue("We expect at least 1 request to be rejected because of we concurrently" +
490         " issued many requests", takenSize < ssNum && takenSize > 0);
491 
492     // Verify that there's at least one snapshot per table
493     int t1SnapshotsCount = 0;
494     int t2SnapshotsCount = 0;
495     for (SnapshotDescription ss : taken) {
496       if (TableName.valueOf(ss.getTable()).equals(TABLE_NAME)) {
497         t1SnapshotsCount++;
498       } else if (TableName.valueOf(ss.getTable()).equals(TABLE2_NAME)) {
499         t2SnapshotsCount++;
500       }
501     }
502     assertTrue("We expect at least 1 snapshot of table1 ", t1SnapshotsCount > 0);
503     assertTrue("We expect at least 1 snapshot of table2 ", t2SnapshotsCount > 0);
504 
505     UTIL.deleteTable(TABLE2_NAME);
506   }
507 
508   private void waitRegionsAfterMerge(final long numRegionsAfterMerge)
509       throws IOException, InterruptedException {
510     Admin admin = UTIL.getHBaseAdmin();
511     // Verify that there's one region less
512     long startTime = System.currentTimeMillis();
513     while (admin.getTableRegions(TABLE_NAME).size() != numRegionsAfterMerge) {
514       // This may be flaky... if after 15sec the merge is not complete give up
515       // it will fail in the assertEquals(numRegionsAfterMerge).
516       if ((System.currentTimeMillis() - startTime) > 15000)
517         break;
518       Thread.sleep(100);
519     }
520     SnapshotTestingUtils.waitForTableToBeOnline(UTIL, TABLE_NAME);
521   }
522 }