View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.procedure;
19  
20  import java.io.IOException;
21  import java.io.InputStream;
22  import java.io.InterruptedIOException;
23  import java.io.OutputStream;
24  import java.util.ArrayList;
25  import java.util.Collection;
26  import java.util.HashSet;
27  import java.util.List;
28  import java.util.Set;
29  import java.util.concurrent.locks.Lock;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.HConstants;
34  import org.apache.hadoop.hbase.HRegionInfo;
35  import org.apache.hadoop.hbase.ServerName;
36  import org.apache.hadoop.hbase.client.ClusterConnection;
37  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
38  import org.apache.hadoop.hbase.master.AssignmentManager;
39  import org.apache.hadoop.hbase.master.MasterFileSystem;
40  import org.apache.hadoop.hbase.master.MasterServices;
41  import org.apache.hadoop.hbase.master.RegionState;
42  import org.apache.hadoop.hbase.master.RegionStates;
43  import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
44  import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
45  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
46  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo;
47  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos;
48  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashState;
49  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
50  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
51  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
52  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
53  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
54  import org.apache.hadoop.util.StringUtils;
55  import org.apache.zookeeper.KeeperException;
56  
57  /**
58   * Handle crashed server. This is a port to ProcedureV2 of what used to be euphemistically called
59   * ServerShutdownHandler.
60   *
61   * <p>The procedure flow varies dependent on whether meta is assigned, if we are
62   * doing distributed log replay versus distributed log splitting, and if we are to split logs at
63   * all.
64   *
65   * <p>This procedure asks that all crashed servers get processed equally; we yield after the
66   * completion of each successful flow step. We do this so that we do not 'deadlock' waiting on
67   * a region assignment so we can replay edits which could happen if a region moved there are edits
68   * on two servers for replay.
69   *
70   * <p>TODO: ASSIGN and WAIT_ON_ASSIGN (at least) are not idempotent. Revisit when assign is pv2.
71   * TODO: We do not have special handling for system tables.
72   */
73  public class ServerCrashProcedure
74  extends StateMachineProcedure<MasterProcedureEnv, ServerCrashState>
75  implements ServerProcedureInterface {
76    private static final Log LOG = LogFactory.getLog(ServerCrashProcedure.class);
77  
78    /**
79     * Configuration key to set how long to wait in ms doing a quick check on meta state.
80     */
81    public static final String KEY_SHORT_WAIT_ON_META =
82        "hbase.master.servercrash.short.wait.on.meta.ms";
83  
84    public static final int DEFAULT_SHORT_WAIT_ON_META = 1000;
85  
86    /**
87     * Configuration key to set how many retries to cycle before we give up on meta.
88     * Each attempt will wait at least {@link #KEY_SHORT_WAIT_ON_META} milliseconds.
89     */
90    public static final String KEY_RETRIES_ON_META =
91        "hbase.master.servercrash.meta.retries";
92  
93    public static final int DEFAULT_RETRIES_ON_META = 10;
94  
95    /**
96     * Configuration key to set how long to wait in ms on regions in transition.
97     */
98    public static final String KEY_WAIT_ON_RIT =
99        "hbase.master.servercrash.wait.on.rit.ms";
100 
101   public static final int DEFAULT_WAIT_ON_RIT = 30000;
102 
103   private static final Set<HRegionInfo> META_REGION_SET = new HashSet<HRegionInfo>();
104   static {
105     META_REGION_SET.add(HRegionInfo.FIRST_META_REGIONINFO);
106   }
107 
108   /**
109    * Name of the crashed server to process.
110    */
111   private ServerName serverName;
112 
113   /**
114    * Whether DeadServer knows that we are processing it.
115    */
116   private boolean notifiedDeadServer = false;
117 
118   /**
119    * Regions that were on the crashed server.
120    */
121   private Set<HRegionInfo> regionsOnCrashedServer;
122 
123   /**
124    * Regions assigned. Usually some subset of {@link #regionsOnCrashedServer}.
125    */
126   private List<HRegionInfo> regionsAssigned;
127 
128   private boolean distributedLogReplay = false;
129   private boolean carryingMeta = false;
130   private boolean shouldSplitWal;
131 
132   /**
133    * Cycles on same state. Good for figuring if we are stuck.
134    */
135   private int cycles = 0;
136 
137   /**
138    * Ordinal of the previous state. So we can tell if we are progressing or not. TODO: if useful,
139    * move this back up into StateMachineProcedure
140    */
141   private int previousState;
142 
143   /**
144    * Call this constructor queuing up a Procedure.
145    * @param serverName Name of the crashed server.
146    * @param shouldSplitWal True if we should split WALs as part of crashed server processing.
147    * @param carryingMeta True if carrying hbase:meta table region.
148    */
149   public ServerCrashProcedure(final ServerName serverName,
150       final boolean shouldSplitWal, final boolean carryingMeta) {
151     this.serverName = serverName;
152     this.shouldSplitWal = shouldSplitWal;
153     this.carryingMeta = carryingMeta;
154     // Currently not used.
155   }
156 
157   /**
158    * Used when deserializing from a procedure store; we'll construct one of these then call
159    * {@link #deserializeStateData(InputStream)}. Do not use directly.
160    */
161   public ServerCrashProcedure() {
162     super();
163   }
164 
165   private void throwProcedureYieldException(final String msg) throws ProcedureYieldException {
166     String logMsg = msg + "; cycle=" + this.cycles + ", running for " +
167         StringUtils.formatTimeDiff(System.currentTimeMillis(), getStartTime());
168     // The procedure executor logs ProcedureYieldException at trace level. For now, log these
169     // yields for server crash processing at DEBUG. Revisit when stable.
170     if (LOG.isDebugEnabled()) LOG.debug(logMsg);
171     throw new ProcedureYieldException(logMsg);
172   }
173 
174   @Override
175   protected Flow executeFromState(MasterProcedureEnv env, ServerCrashState state)
176   throws ProcedureYieldException {
177     if (LOG.isTraceEnabled()) {
178       LOG.trace(state);
179     }
180     // Keep running count of cycles
181     if (state.ordinal() != this.previousState) {
182       this.previousState = state.ordinal();
183       this.cycles = 0;
184     } else {
185       this.cycles++;
186     }
187     MasterServices services = env.getMasterServices();
188     // Is master fully online? If not, yield. No processing of servers unless master is up
189     if (!services.getAssignmentManager().isFailoverCleanupDone()) {
190       throwProcedureYieldException("Waiting on master failover to complete");
191     }
192     // HBASE-14802
193     // If we have not yet notified that we are processing a dead server, we should do now.
194     if (!notifiedDeadServer) {
195       services.getServerManager().getDeadServers().notifyServer(serverName);
196       notifiedDeadServer = true;
197     }
198 
199     try {
200       switch (state) {
201       case SERVER_CRASH_START:
202         LOG.info("Start processing crashed " + this.serverName);
203         start(env);
204         // If carrying meta, process it first. Else, get list of regions on crashed server.
205         if (this.carryingMeta) setNextState(ServerCrashState.SERVER_CRASH_PROCESS_META);
206         else setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
207         break;
208 
209       case SERVER_CRASH_GET_REGIONS:
210         // If hbase:meta is not assigned, yield.
211         if (!isMetaAssignedQuickTest(env)) {
212           throwProcedureYieldException("Waiting on hbase:meta assignment");
213         }
214         this.regionsOnCrashedServer =
215           services.getAssignmentManager().getRegionStates().getServerRegions(this.serverName);
216         // Where to go next? Depends on whether we should split logs at all or if we should do
217         // distributed log splitting (DLS) vs distributed log replay (DLR).
218         if (!this.shouldSplitWal) {
219           setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
220         } else if (this.distributedLogReplay) {
221           setNextState(ServerCrashState.SERVER_CRASH_PREPARE_LOG_REPLAY);
222         } else {
223           setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
224         }
225         break;
226 
227       case SERVER_CRASH_PROCESS_META:
228         // If we fail processing hbase:meta, yield.
229         if (!processMeta(env)) {
230           throwProcedureYieldException("Waiting on regions-in-transition to clear");
231         }
232         setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
233         break;
234 
235       case SERVER_CRASH_PREPARE_LOG_REPLAY:
236         prepareLogReplay(env, this.regionsOnCrashedServer);
237         setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
238         break;
239 
240       case SERVER_CRASH_SPLIT_LOGS:
241         splitLogs(env);
242         // If DLR, go to FINISH. Otherwise, if DLS, go to SERVER_CRASH_ASSIGN
243         if (this.distributedLogReplay) setNextState(ServerCrashState.SERVER_CRASH_FINISH);
244         else setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
245         break;
246 
247       case SERVER_CRASH_ASSIGN:
248         List<HRegionInfo> regionsToAssign = calcRegionsToAssign(env);
249 
250         // Assign may not be idempotent. SSH used to requeue the SSH if we got an IOE assigning
251         // which is what we are mimicing here but it looks prone to double assignment if assign
252         // fails midway. TODO: Test.
253 
254         // If no regions to assign, skip assign and skip to the finish.
255         boolean regions = regionsToAssign != null && !regionsToAssign.isEmpty();
256         if (regions) {
257           this.regionsAssigned = regionsToAssign;
258           if (!assign(env, regionsToAssign)) {
259             throwProcedureYieldException("Failed assign; will retry");
260           }
261         }
262         if (this.shouldSplitWal && distributedLogReplay) {
263           // Take this route even if there are apparently no regions assigned. This may be our
264           // second time through here; i.e. we assigned and crashed just about here. On second
265           // time through, there will be no regions because we assigned them in the previous step.
266           // Even though no regions, we need to go through here to clean up the DLR zk markers.
267           setNextState(ServerCrashState.SERVER_CRASH_WAIT_ON_ASSIGN);
268         } else {
269           setNextState(ServerCrashState.SERVER_CRASH_FINISH);
270         }
271         break;
272 
273       case SERVER_CRASH_WAIT_ON_ASSIGN:
274         // TODO: The list of regionsAssigned may be more than we actually assigned. See down in
275         // AM #1629 around 'if (regionStates.wasRegionOnDeadServer(encodedName)) {' where where we
276         // will skip assigning a region because it is/was on a dead server. Should never happen!
277         // It was on this server. Worst comes to worst, we'll still wait here till other server is
278         // processed.
279 
280         // If the wait on assign failed, yield -- if we have regions to assign.
281         if (this.regionsAssigned != null && !this.regionsAssigned.isEmpty()) {
282           if (!waitOnAssign(env, this.regionsAssigned)) {
283             throwProcedureYieldException("Waiting on region assign");
284           }
285         }
286         setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
287         break;
288 
289       case SERVER_CRASH_FINISH:
290         LOG.info("Finished processing of crashed " + serverName);
291         services.getServerManager().getDeadServers().finish(serverName);
292         return Flow.NO_MORE_STATE;
293 
294       default:
295           throw new UnsupportedOperationException("unhandled state=" + state);
296       }
297     } catch (IOException e) {
298       LOG.warn("Failed serverName=" + this.serverName + ", state=" + state + "; retry", e);
299     } catch (InterruptedException e) {
300       // TODO: Make executor allow IEs coming up out of execute.
301       LOG.warn("Interrupted serverName=" + this.serverName + ", state=" + state + "; retry", e);
302       Thread.currentThread().interrupt();
303     }
304     return Flow.HAS_MORE_STATE;
305   }
306 
307   /**
308    * Start processing of crashed server. In here we'll just set configs. and return.
309    * @param env
310    * @throws IOException
311    */
312   private void start(final MasterProcedureEnv env) throws IOException {
313     MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
314     // Set recovery mode late. This is what the old ServerShutdownHandler used do.
315     mfs.setLogRecoveryMode();
316     this.distributedLogReplay = mfs.getLogRecoveryMode() == RecoveryMode.LOG_REPLAY;
317   }
318 
319   /**
320    * @param env
321    * @return False if we fail to assign and split logs on meta ('process').
322    * @throws IOException
323    * @throws InterruptedException
324    */
325   private boolean processMeta(final MasterProcedureEnv env)
326   throws IOException {
327     if (LOG.isDebugEnabled()) LOG.debug("Processing hbase:meta that was on " + this.serverName);
328     MasterServices services = env.getMasterServices();
329     MasterFileSystem mfs = services.getMasterFileSystem();
330     AssignmentManager am = services.getAssignmentManager();
331     HRegionInfo metaHRI = HRegionInfo.FIRST_META_REGIONINFO;
332     if (this.shouldSplitWal) {
333       if (this.distributedLogReplay) {
334         prepareLogReplay(env, META_REGION_SET);
335       } else {
336         // TODO: Matteo. We BLOCK here but most important thing to be doing at this moment.
337         mfs.splitMetaLog(serverName);
338         am.getRegionStates().logSplit(metaHRI);
339       }
340     }
341 
342     // Assign meta if still carrying it. Check again: region may be assigned because of RIT timeout
343     boolean processed = true;
344     boolean shouldAssignMeta = false;
345     AssignmentManager.ServerHostRegion rsCarryingMetaRegion = am.isCarryingMeta(serverName);
346       switch (rsCarryingMetaRegion) {
347         case HOSTING_REGION:
348           LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
349           am.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
350           shouldAssignMeta = true;
351           break;
352         case UNKNOWN:
353           if (!services.getMetaTableLocator().isLocationAvailable(services.getZooKeeper())) {
354             // the meta location as per master is null. This could happen in case when meta
355             // assignment in previous run failed, while meta znode has been updated to null.
356             // We should try to assign the meta again.
357             shouldAssignMeta = true;
358             break;
359           }
360           // fall through
361         case NOT_HOSTING_REGION:
362           LOG.info("META has been assigned to otherwhere, skip assigning.");
363           break;
364         default:
365           throw new IOException("Unsupported action in MetaServerShutdownHandler");
366     }
367     if (shouldAssignMeta) {
368       // TODO: May block here if hard time figuring state of meta.
369       verifyAndAssignMetaWithRetries(env);
370       if (this.shouldSplitWal && distributedLogReplay) {
371         int timeout = env.getMasterConfiguration().getInt(KEY_WAIT_ON_RIT, DEFAULT_WAIT_ON_RIT);
372         if (!waitOnRegionToClearRegionsInTransition(am, metaHRI, timeout)) {
373           processed = false;
374         } else {
375           // TODO: Matteo. We BLOCK here but most important thing to be doing at this moment.
376           mfs.splitMetaLog(serverName);
377         }
378       }
379     }
380     return processed;
381   }
382 
383   /**
384    * @return True if region cleared RIT, else false if we timed out waiting.
385    * @throws InterruptedIOException
386    */
387   private boolean waitOnRegionToClearRegionsInTransition(AssignmentManager am,
388       final HRegionInfo hri, final int timeout)
389   throws InterruptedIOException {
390     try {
391       if (!am.waitOnRegionToClearRegionsInTransition(hri, timeout)) {
392         // Wait here is to avoid log replay hits current dead server and incur a RPC timeout
393         // when replay happens before region assignment completes.
394         LOG.warn("Region " + hri.getEncodedName() + " didn't complete assignment in time");
395         return false;
396       }
397     } catch (InterruptedException ie) {
398       throw new InterruptedIOException("Caught " + ie +
399         " during waitOnRegionToClearRegionsInTransition for " + hri);
400     }
401     return true;
402   }
403 
404   private void prepareLogReplay(final MasterProcedureEnv env, final Set<HRegionInfo> regions)
405   throws IOException {
406     if (LOG.isDebugEnabled()) {
407       LOG.debug("Mark " + size(this.regionsOnCrashedServer) + " regions-in-recovery from " +
408         this.serverName);
409     }
410     MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
411     AssignmentManager am = env.getMasterServices().getAssignmentManager();
412     mfs.prepareLogReplay(this.serverName, regions);
413     am.getRegionStates().logSplit(this.serverName);
414   }
415 
416   private void splitLogs(final MasterProcedureEnv env) throws IOException {
417     if (LOG.isDebugEnabled()) {
418       LOG.debug("Splitting logs from " + serverName + "; region count=" +
419         size(this.regionsOnCrashedServer));
420     }
421     MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
422     AssignmentManager am = env.getMasterServices().getAssignmentManager();
423     // TODO: For Matteo. Below BLOCKs!!!! Redo so can relinquish executor while it is running.
424     mfs.splitLog(this.serverName);
425     am.getRegionStates().logSplit(this.serverName);
426   }
427 
428   static int size(final Collection<HRegionInfo> hris) {
429     return hris == null? 0: hris.size();
430   }
431 
432   /**
433    * Figure out what we need to assign. Should be idempotent.
434    * @param env
435    * @return List of calculated regions to assign; may be empty or null.
436    * @throws IOException
437    */
438   private List<HRegionInfo> calcRegionsToAssign(final MasterProcedureEnv env)
439   throws IOException {
440     AssignmentManager am = env.getMasterServices().getAssignmentManager();
441     List<HRegionInfo> regionsToAssignAggregator = new ArrayList<HRegionInfo>();
442     int replicaCount = env.getMasterConfiguration().getInt(HConstants.META_REPLICAS_NUM,
443       HConstants.DEFAULT_META_REPLICA_NUM);
444     for (int i = 1; i < replicaCount; i++) {
445       HRegionInfo metaHri =
446           RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, i);
447       if (am.isCarryingMetaReplica(this.serverName, metaHri) ==
448           AssignmentManager.ServerHostRegion.HOSTING_REGION) {
449         if (LOG.isDebugEnabled()) {
450           LOG.debug("Reassigning meta replica" + metaHri + " that was on " + this.serverName);
451         }
452         regionsToAssignAggregator.add(metaHri);
453       }
454     }
455     // Clean out anything in regions in transition.
456     List<HRegionInfo> regionsInTransition = am.cleanOutCrashedServerReferences(serverName);
457     if (LOG.isDebugEnabled()) {
458       LOG.debug("Reassigning " + size(this.regionsOnCrashedServer) +
459         " region(s) that " + (serverName == null? "null": serverName)  +
460         " was carrying (and " + regionsInTransition.size() +
461         " regions(s) that were opening on this server)");
462     }
463     regionsToAssignAggregator.addAll(regionsInTransition);
464 
465     // Iterate regions that were on this server and figure which of these we need to reassign
466     if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) {
467       RegionStates regionStates = am.getRegionStates();
468       for (HRegionInfo hri: this.regionsOnCrashedServer) {
469         if (regionsInTransition.contains(hri)) continue;
470         String encodedName = hri.getEncodedName();
471         Lock lock = am.acquireRegionLock(encodedName);
472         try {
473           RegionState rit = regionStates.getRegionTransitionState(hri);
474           if (processDeadRegion(hri, am)) {
475             ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
476             if (addressFromAM != null && !addressFromAM.equals(this.serverName)) {
477               // If this region is in transition on the dead server, it must be
478               // opening or pending_open, which should have been covered by
479               // AM#cleanOutCrashedServerReferences
480               LOG.info("Skip assigning " + hri.getRegionNameAsString()
481                 + " because opened on " + addressFromAM.getServerName());
482               continue;
483             }
484             if (rit != null) {
485               if (rit.getServerName() != null && !rit.isOnServer(this.serverName)) {
486                 // Skip regions that are in transition on other server
487                 LOG.info("Skip assigning region in transition on other server" + rit);
488                 continue;
489               }
490               LOG.info("Reassigning region " + rit + " and clearing zknode if exists");
491               try {
492                 // This clears out any RIT that might be sticking around.
493                 ZKAssign.deleteNodeFailSilent(env.getMasterServices().getZooKeeper(), hri);
494               } catch (KeeperException e) {
495                 // TODO: FIX!!!! ABORTING SERVER BECAUSE COULDN"T PURGE ZNODE. This is what we
496                 // used to do but that doesn't make it right!!!
497                 env.getMasterServices().abort("Unexpected error deleting RIT " + hri, e);
498                 throw new IOException(e);
499               }
500               regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
501             } else if (regionStates.isRegionInState(
502                 hri, RegionState.State.SPLITTING_NEW, RegionState.State.MERGING_NEW)) {
503               regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
504             }
505             regionsToAssignAggregator.add(hri);
506           // TODO: The below else if is different in branch-1 from master branch.
507           } else if (rit != null) {
508             if ((rit.isPendingCloseOrClosing() || rit.isOffline())
509                 && am.getTableStateManager().isTableState(hri.getTable(),
510                 ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING) ||
511                 am.getReplicasToClose().contains(hri)) {
512               // If the table was partially disabled and the RS went down, we should clear the
513               // RIT and remove the node for the region.
514               // The rit that we use may be stale in case the table was in DISABLING state
515               // but though we did assign we will not be clearing the znode in CLOSING state.
516               // Doing this will have no harm. See HBASE-5927
517               regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
518               am.deleteClosingOrClosedNode(hri, rit.getServerName());
519               am.offlineDisabledRegion(hri);
520             } else {
521               LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition "
522                 + rit + " not to be assigned by SSH of server " + serverName);
523             }
524           }
525         } finally {
526           lock.unlock();
527         }
528       }
529     }
530     return regionsToAssignAggregator;
531   }
532 
533   private boolean assign(final MasterProcedureEnv env, final List<HRegionInfo> hris)
534   throws InterruptedIOException {
535     AssignmentManager am = env.getMasterServices().getAssignmentManager();
536     try {
537       am.assign(hris);
538     } catch (InterruptedException ie) {
539       LOG.error("Caught " + ie + " during round-robin assignment");
540       throw (InterruptedIOException)new InterruptedIOException().initCause(ie);
541     } catch (IOException ioe) {
542       LOG.info("Caught " + ioe + " during region assignment, will retry");
543       return false;
544     }
545     return true;
546   }
547 
548   private boolean waitOnAssign(final MasterProcedureEnv env, final List<HRegionInfo> hris)
549   throws InterruptedIOException {
550     int timeout = env.getMasterConfiguration().getInt(KEY_WAIT_ON_RIT, DEFAULT_WAIT_ON_RIT);
551     for (HRegionInfo hri: hris) {
552       // TODO: Blocks here.
553       if (!waitOnRegionToClearRegionsInTransition(env.getMasterServices().getAssignmentManager(),
554           hri, timeout)) {
555         return false;
556       }
557     }
558     return true;
559   }
560 
561   @Override
562   protected void rollbackState(MasterProcedureEnv env, ServerCrashState state)
563   throws IOException {
564     // Can't rollback.
565     throw new UnsupportedOperationException("unhandled state=" + state);
566   }
567 
568   @Override
569   protected ServerCrashState getState(int stateId) {
570     return ServerCrashState.valueOf(stateId);
571   }
572 
573   @Override
574   protected int getStateId(ServerCrashState state) {
575     return state.getNumber();
576   }
577 
578   @Override
579   protected ServerCrashState getInitialState() {
580     return ServerCrashState.SERVER_CRASH_START;
581   }
582 
583   @Override
584   protected boolean abort(MasterProcedureEnv env) {
585     // TODO
586     return false;
587   }
588 
589   @Override
590   protected boolean acquireLock(final MasterProcedureEnv env) {
591     if (env.waitServerCrashProcessingEnabled(this)) return false;
592     return env.getProcedureQueue().tryAcquireServerExclusiveLock(this, getServerName());
593   }
594 
595   @Override
596   protected void releaseLock(final MasterProcedureEnv env) {
597     env.getProcedureQueue().releaseServerExclusiveLock(this, getServerName());
598   }
599 
600   @Override
601   public void toStringClassDetails(StringBuilder sb) {
602     sb.append(getClass().getSimpleName());
603     sb.append(" serverName=");
604     sb.append(this.serverName);
605     sb.append(", shouldSplitWal=");
606     sb.append(shouldSplitWal);
607     sb.append(", carryingMeta=");
608     sb.append(carryingMeta);
609   }
610 
611   @Override
612   public void serializeStateData(final OutputStream stream) throws IOException {
613     super.serializeStateData(stream);
614 
615     MasterProcedureProtos.ServerCrashStateData.Builder state =
616       MasterProcedureProtos.ServerCrashStateData.newBuilder().
617       setServerName(ProtobufUtil.toServerName(this.serverName)).
618       setDistributedLogReplay(this.distributedLogReplay).
619       setCarryingMeta(this.carryingMeta).
620       setShouldSplitWal(this.shouldSplitWal);
621     if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) {
622       for (HRegionInfo hri: this.regionsOnCrashedServer) {
623         state.addRegionsOnCrashedServer(HRegionInfo.convert(hri));
624       }
625     }
626     if (this.regionsAssigned != null && !this.regionsAssigned.isEmpty()) {
627       for (HRegionInfo hri: this.regionsAssigned) {
628         state.addRegionsAssigned(HRegionInfo.convert(hri));
629       }
630     }
631     state.build().writeDelimitedTo(stream);
632   }
633 
634   @Override
635   public void deserializeStateData(final InputStream stream) throws IOException {
636     super.deserializeStateData(stream);
637 
638     MasterProcedureProtos.ServerCrashStateData state =
639       MasterProcedureProtos.ServerCrashStateData.parseDelimitedFrom(stream);
640     this.serverName = ProtobufUtil.toServerName(state.getServerName());
641     this.distributedLogReplay = state.hasDistributedLogReplay()?
642       state.getDistributedLogReplay(): false;
643     this.carryingMeta = state.hasCarryingMeta()? state.getCarryingMeta(): false;
644     // shouldSplitWAL has a default over in pb so this invocation will always work.
645     this.shouldSplitWal = state.getShouldSplitWal();
646     int size = state.getRegionsOnCrashedServerCount();
647     if (size > 0) {
648       this.regionsOnCrashedServer = new HashSet<HRegionInfo>(size);
649       for (RegionInfo ri: state.getRegionsOnCrashedServerList()) {
650         this.regionsOnCrashedServer.add(HRegionInfo.convert(ri));
651       }
652     }
653     size = state.getRegionsAssignedCount();
654     if (size > 0) {
655       this.regionsAssigned = new ArrayList<HRegionInfo>(size);
656       for (RegionInfo ri: state.getRegionsOnCrashedServerList()) {
657         this.regionsAssigned.add(HRegionInfo.convert(ri));
658       }
659     }
660   }
661 
662   /**
663    * Process a dead region from a dead RS. Checks if the region is disabled or
664    * disabling or if the region has a partially completed split.
665    * @param hri
666    * @param assignmentManager
667    * @return Returns true if specified region should be assigned, false if not.
668    * @throws IOException
669    */
670   private static boolean processDeadRegion(HRegionInfo hri, AssignmentManager assignmentManager)
671   throws IOException {
672     boolean tablePresent = assignmentManager.getTableStateManager().isTablePresent(hri.getTable());
673     if (!tablePresent) {
674       LOG.info("The table " + hri.getTable() + " was deleted.  Hence not proceeding.");
675       return false;
676     }
677     // If table is not disabled but the region is offlined,
678     boolean disabled = assignmentManager.getTableStateManager().isTableState(hri.getTable(),
679       ZooKeeperProtos.Table.State.DISABLED);
680     if (disabled){
681       LOG.info("The table " + hri.getTable() + " was disabled.  Hence not proceeding.");
682       return false;
683     }
684     if (hri.isOffline() && hri.isSplit()) {
685       // HBASE-7721: Split parent and daughters are inserted into hbase:meta as an atomic operation.
686       // If the meta scanner saw the parent split, then it should see the daughters as assigned
687       // to the dead server. We don't have to do anything.
688       return false;
689     }
690     boolean disabling = assignmentManager.getTableStateManager().isTableState(hri.getTable(),
691       ZooKeeperProtos.Table.State.DISABLING);
692     if (disabling) {
693       LOG.info("The table " + hri.getTable() + " is disabled.  Hence not assigning region" +
694         hri.getEncodedName());
695       return false;
696     }
697     return true;
698   }
699 
700   /**
701    * If hbase:meta is not assigned already, assign.
702    * @throws IOException
703    */
704   private void verifyAndAssignMetaWithRetries(final MasterProcedureEnv env) throws IOException {
705     MasterServices services = env.getMasterServices();
706     int iTimes = services.getConfiguration().getInt(KEY_RETRIES_ON_META, DEFAULT_RETRIES_ON_META);
707     // Just reuse same time as we have for short wait on meta. Adding another config is overkill.
708     long waitTime =
709       services.getConfiguration().getLong(KEY_SHORT_WAIT_ON_META, DEFAULT_SHORT_WAIT_ON_META);
710     int iFlag = 0;
711     while (true) {
712       try {
713         verifyAndAssignMeta(env);
714         break;
715       } catch (KeeperException e) {
716         services.abort("In server shutdown processing, assigning meta", e);
717         throw new IOException("Aborting", e);
718       } catch (Exception e) {
719         if (iFlag >= iTimes) {
720           services.abort("verifyAndAssignMeta failed after" + iTimes + " retries, aborting", e);
721           throw new IOException("Aborting", e);
722         }
723         try {
724           Thread.sleep(waitTime);
725         } catch (InterruptedException e1) {
726           LOG.warn("Interrupted when is the thread sleep", e1);
727           Thread.currentThread().interrupt();
728           throw (InterruptedIOException)new InterruptedIOException().initCause(e1);
729         }
730         iFlag++;
731       }
732     }
733   }
734 
735   /**
736    * If hbase:meta is not assigned already, assign.
737    * @throws InterruptedException
738    * @throws IOException
739    * @throws KeeperException
740    */
741   private void verifyAndAssignMeta(final MasterProcedureEnv env)
742       throws InterruptedException, IOException, KeeperException {
743     MasterServices services = env.getMasterServices();
744     if (!isMetaAssignedQuickTest(env)) {
745       services.getAssignmentManager().assignMeta(HRegionInfo.FIRST_META_REGIONINFO);
746     } else if (serverName.equals(services.getMetaTableLocator().
747         getMetaRegionLocation(services.getZooKeeper()))) {
748       // hbase:meta seems to be still alive on the server whom master is expiring
749       // and thinks is dying. Let's re-assign the hbase:meta anyway.
750       services.getAssignmentManager().assignMeta(HRegionInfo.FIRST_META_REGIONINFO);
751     } else {
752       LOG.info("Skip assigning hbase:meta because it is online at "
753           + services.getMetaTableLocator().getMetaRegionLocation(services.getZooKeeper()));
754     }
755   }
756 
757   /**
758    * A quick test that hbase:meta is assigned; blocks for short time only.
759    * @return True if hbase:meta location is available and verified as good.
760    * @throws InterruptedException
761    * @throws IOException
762    */
763   private boolean isMetaAssignedQuickTest(final MasterProcedureEnv env)
764   throws InterruptedException, IOException {
765     ZooKeeperWatcher zkw = env.getMasterServices().getZooKeeper();
766     MetaTableLocator mtl = env.getMasterServices().getMetaTableLocator();
767     boolean metaAssigned = false;
768     // Is hbase:meta location available yet?
769     if (mtl.isLocationAvailable(zkw)) {
770       ClusterConnection connection = env.getMasterServices().getConnection();
771       // Is hbase:meta location good yet?
772       long timeout =
773         env.getMasterConfiguration().getLong(KEY_SHORT_WAIT_ON_META, DEFAULT_SHORT_WAIT_ON_META);
774       if (mtl.verifyMetaRegionLocation(connection, zkw, timeout)) {
775         metaAssigned = true;
776       }
777     }
778     return metaAssigned;
779   }
780 
781   @Override
782   public ServerName getServerName() {
783     return this.serverName;
784   }
785 
786   @Override
787   public boolean hasMetaTableRegion() {
788     return this.carryingMeta;
789   }
790 
791   @Override
792   public ServerOperationType getServerOperationType() {
793     return ServerOperationType.CRASH_HANDLER;
794   }
795 
796   /**
797    * For this procedure, yield at end of each successful flow step so that all crashed servers
798    * can make progress rather than do the default which has each procedure running to completion
799    * before we move to the next. For crashed servers, especially if running with distributed log
800    * replay, we will want all servers to come along; we do not want the scenario where a server is
801    * stuck waiting for regions to online so it can replay edits.
802    */
803   @Override
804   protected boolean isYieldBeforeExecuteFromState(MasterProcedureEnv env, ServerCrashState state) {
805     return true;
806   }
807 
808   @Override
809   protected boolean shouldWaitClientAck(MasterProcedureEnv env) {
810     // The operation is triggered internally on the server
811     // the client does not know about this procedure.
812     return false;
813   }
814 }