1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.snapshot;
19
20 import java.io.FileNotFoundException;
21 import java.io.IOException;
22 import java.util.HashSet;
23 import java.util.List;
24 import java.util.Set;
25 import java.util.concurrent.CancellationException;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.classification.InterfaceAudience;
30 import org.apache.hadoop.conf.Configuration;
31 import org.apache.hadoop.fs.FileSystem;
32 import org.apache.hadoop.fs.Path;
33 import org.apache.hadoop.hbase.TableName;
34 import org.apache.hadoop.hbase.HRegionInfo;
35 import org.apache.hadoop.hbase.HTableDescriptor;
36 import org.apache.hadoop.hbase.ServerName;
37 import org.apache.hadoop.hbase.catalog.MetaReader;
38 import org.apache.hadoop.hbase.errorhandling.ForeignException;
39 import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
40 import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
41 import org.apache.hadoop.hbase.executor.EventHandler;
42 import org.apache.hadoop.hbase.executor.EventType;
43 import org.apache.hadoop.hbase.master.MasterServices;
44 import org.apache.hadoop.hbase.master.MetricsSnapshot;
45 import org.apache.hadoop.hbase.master.SnapshotSentinel;
46 import org.apache.hadoop.hbase.master.TableLockManager;
47 import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
48 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
49 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
50 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
51 import org.apache.hadoop.hbase.regionserver.HRegion;
52 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
53 import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
54 import org.apache.hadoop.hbase.snapshot.CopyRecoveredEditsTask;
55 import org.apache.hadoop.hbase.snapshot.ReferenceRegionHFilesTask;
56 import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
57 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
58 import org.apache.hadoop.hbase.snapshot.TableInfoCopyTask;
59 import org.apache.hadoop.hbase.util.Pair;
60 import org.apache.zookeeper.KeeperException;
61
62
63
64
65
66
67
68
69 @InterfaceAudience.Private
70 public abstract class TakeSnapshotHandler extends EventHandler implements SnapshotSentinel,
71 ForeignExceptionSnare {
72 private static final Log LOG = LogFactory.getLog(TakeSnapshotHandler.class);
73
74 private volatile boolean finished;
75
76
77 protected final MasterServices master;
78 protected final MetricsSnapshot metricsSnapshot = new MetricsSnapshot();
79 protected final SnapshotDescription snapshot;
80 protected final Configuration conf;
81 protected final FileSystem fs;
82 protected final Path rootDir;
83 private final Path snapshotDir;
84 protected final Path workingDir;
85 private final MasterSnapshotVerifier verifier;
86 protected final ForeignExceptionDispatcher monitor;
87 protected final TableLockManager tableLockManager;
88 protected final TableLock tableLock;
89 protected final MonitoredTask status;
90 protected final TableName snapshotTable;
91
92
93
94
95
96 public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices) {
97 super(masterServices, EventType.C_M_SNAPSHOT_TABLE);
98 assert snapshot != null : "SnapshotDescription must not be nul1";
99 assert masterServices != null : "MasterServices must not be nul1";
100
101 this.master = masterServices;
102 this.snapshot = snapshot;
103 this.snapshotTable = TableName.valueOf(snapshot.getTable());
104 this.conf = this.master.getConfiguration();
105 this.fs = this.master.getMasterFileSystem().getFileSystem();
106 this.rootDir = this.master.getMasterFileSystem().getRootDir();
107 this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
108 this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
109 this.monitor = new ForeignExceptionDispatcher(snapshot.getName());
110
111 this.tableLockManager = master.getTableLockManager();
112 this.tableLock = this.tableLockManager.writeLock(
113 snapshotTable,
114 EventType.C_M_SNAPSHOT_TABLE.toString());
115
116
117 this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, rootDir);
118
119 this.status = TaskMonitor.get().createStatus(
120 "Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable);
121 }
122
123 private HTableDescriptor loadTableDescriptor()
124 throws FileNotFoundException, IOException {
125 HTableDescriptor htd =
126 this.master.getTableDescriptors().get(snapshotTable);
127 if (htd == null) {
128 throw new IOException("HTableDescriptor missing for " + snapshotTable);
129 }
130 return htd;
131 }
132
133 public TakeSnapshotHandler prepare() throws Exception {
134 super.prepare();
135 this.tableLock.acquire();
136
137 boolean success = false;
138 try {
139 loadTableDescriptor();
140 success = true;
141 } finally {
142 if (!success) {
143 releaseTableLock();
144 }
145 }
146
147 return this;
148 }
149
150
151
152
153
154 @Override
155 public void process() {
156 String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " "
157 + eventType + " on table " + snapshotTable;
158 LOG.info(msg);
159 status.setStatus(msg);
160 try {
161
162
163
164
165 SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, this.fs);
166 new TableInfoCopyTask(monitor, snapshot, fs, rootDir).call();
167 monitor.rethrowException();
168
169 List<Pair<HRegionInfo, ServerName>> regionsAndLocations =
170 MetaReader.getTableRegionsAndLocations(this.server.getCatalogTracker(),
171 snapshotTable, false);
172
173
174 snapshotRegions(regionsAndLocations);
175 monitor.rethrowException();
176
177
178 Set<String> serverNames = new HashSet<String>();
179 for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
180 if (p != null && p.getFirst() != null && p.getSecond() != null) {
181 HRegionInfo hri = p.getFirst();
182 if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue;
183 serverNames.add(p.getSecond().toString());
184 }
185 }
186
187
188 status.setStatus("Verifying snapshot: " + snapshot.getName());
189 verifier.verifySnapshot(this.workingDir, serverNames);
190
191
192 completeSnapshot(this.snapshotDir, this.workingDir, this.fs);
193 status.markComplete("Snapshot " + snapshot.getName() + " of table " + snapshotTable
194 + " completed");
195 LOG.info("Snapshot " + snapshot.getName() + " of table " + snapshotTable
196 + " completed");
197 metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
198 } catch (Exception e) {
199 status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " +
200 snapshotTable + " because " + e.getMessage());
201 String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot)
202 + " due to exception:" + e.getMessage();
203 LOG.error(reason, e);
204 ForeignException ee = new ForeignException(reason, e);
205 monitor.receive(ee);
206
207 cancel("Failed to take snapshot '" + ClientSnapshotDescriptionUtils.toString(snapshot)
208 + "' due to exception");
209 } finally {
210 LOG.debug("Launching cleanup of working dir:" + workingDir);
211 try {
212
213
214 if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) {
215 LOG.error("Couldn't delete snapshot working directory:" + workingDir);
216 }
217 } catch (IOException e) {
218 LOG.error("Couldn't delete snapshot working directory:" + workingDir);
219 }
220 releaseTableLock();
221 }
222 }
223
224 protected void releaseTableLock() {
225 if (this.tableLock != null) {
226 try {
227 this.tableLock.release();
228 } catch (IOException ex) {
229 LOG.warn("Could not release the table lock", ex);
230 }
231 }
232 }
233
234
235
236
237
238
239
240
241
242
243 public void completeSnapshot(Path snapshotDir, Path workingDir, FileSystem fs)
244 throws SnapshotCreationException, IOException {
245 LOG.debug("Sentinel is done, just moving the snapshot from " + workingDir + " to "
246 + snapshotDir);
247 if (!fs.rename(workingDir, snapshotDir)) {
248 throw new SnapshotCreationException("Failed to move working directory(" + workingDir
249 + ") to completed directory(" + snapshotDir + ").");
250 }
251 finished = true;
252 }
253
254
255
256
257 protected abstract void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regions)
258 throws IOException, KeeperException;
259
260
261
262
263 protected void snapshotDisabledRegion(final HRegionInfo regionInfo)
264 throws IOException {
265
266 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
267 workingDir, regionInfo);
268
269
270 monitor.rethrowException();
271
272
273 Path regionDir = HRegion.getRegionDir(rootDir, regionInfo);
274 Path snapshotRegionDir = regionFs.getRegionDir();
275 new CopyRecoveredEditsTask(snapshot, monitor, fs, regionDir, snapshotRegionDir).call();
276 monitor.rethrowException();
277 status.setStatus("Completed copying recovered edits for offline snapshot of table: "
278 + snapshotTable);
279
280
281 new ReferenceRegionHFilesTask(snapshot, monitor, regionDir, fs, snapshotRegionDir).call();
282 monitor.rethrowException();
283 status.setStatus("Completed referencing HFiles for offline snapshot of table: " +
284 snapshotTable);
285 }
286
287 @Override
288 public void cancel(String why) {
289 if (finished) return;
290
291 this.finished = true;
292 LOG.info("Stop taking snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
293 " because: " + why);
294 CancellationException ce = new CancellationException(why);
295 monitor.receive(new ForeignException(master.getServerName().toString(), ce));
296 }
297
298 @Override
299 public boolean isFinished() {
300 return finished;
301 }
302
303 @Override
304 public long getCompletionTimestamp() {
305 return this.status.getCompletionTimestamp();
306 }
307
308 @Override
309 public SnapshotDescription getSnapshot() {
310 return snapshot;
311 }
312
313 @Override
314 public ForeignException getExceptionIfFailed() {
315 return monitor.getException();
316 }
317
318 @Override
319 public void rethrowExceptionIfFailed() throws ForeignException {
320 monitor.rethrowException();
321 }
322
323 @Override
324 public void rethrowException() throws ForeignException {
325 monitor.rethrowException();
326 }
327
328 @Override
329 public boolean hasException() {
330 return monitor.hasException();
331 }
332
333 @Override
334 public ForeignException getException() {
335 return monitor.getException();
336 }
337
338 }