View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.procedure;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.OutputStream;
24  import java.security.PrivilegedExceptionAction;
25  import java.util.HashMap;
26  import java.util.HashSet;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.concurrent.atomic.AtomicBoolean;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.MetaTableAccessor;
35  import org.apache.hadoop.hbase.ServerName;
36  import org.apache.hadoop.hbase.TableName;
37  import org.apache.hadoop.hbase.TableNotDisabledException;
38  import org.apache.hadoop.hbase.TableNotFoundException;
39  import org.apache.hadoop.hbase.TableStateManager;
40  import org.apache.hadoop.hbase.classification.InterfaceAudience;
41  import org.apache.hadoop.hbase.exceptions.HBaseException;
42  import org.apache.hadoop.hbase.executor.EventType;
43  import org.apache.hadoop.hbase.master.AssignmentManager;
44  import org.apache.hadoop.hbase.master.BulkAssigner;
45  import org.apache.hadoop.hbase.master.GeneralBulkAssigner;
46  import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
47  import org.apache.hadoop.hbase.master.MasterServices;
48  import org.apache.hadoop.hbase.master.RegionStates;
49  import org.apache.hadoop.hbase.master.ServerManager;
50  import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
51  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
52  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos;
53  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.EnableTableState;
54  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
55  import org.apache.hadoop.hbase.util.Pair;
56  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
57  import org.apache.hadoop.security.UserGroupInformation;
58  
59  @InterfaceAudience.Private
60  public class EnableTableProcedure
61      extends StateMachineProcedure<MasterProcedureEnv, EnableTableState>
62      implements TableProcedureInterface {
63    private static final Log LOG = LogFactory.getLog(EnableTableProcedure.class);
64  
65    private final AtomicBoolean aborted = new AtomicBoolean(false);
66  
67    // This is for back compatible with 1.0 asynchronized operations.
68    private final ProcedurePrepareLatch syncLatch;
69  
70    private TableName tableName;
71    private boolean skipTableStateCheck;
72    private UserGroupInformation user;
73  
74    private Boolean traceEnabled = null;
75  
76    public EnableTableProcedure() {
77      syncLatch = null;
78    }
79  
80    /**
81     * Constructor
82     * @param env MasterProcedureEnv
83     * @param tableName the table to operate on
84     * @param skipTableStateCheck whether to check table state
85     * @throws IOException
86     */
87    public EnableTableProcedure(
88        final MasterProcedureEnv env,
89        final TableName tableName,
90        final boolean skipTableStateCheck) throws IOException {
91      this(env, tableName, skipTableStateCheck, null);
92    }
93  
94    /**
95     * Constructor
96     * @param env MasterProcedureEnv
97     * @param tableName the table to operate on
98     * @param skipTableStateCheck whether to check table state
99     * @throws IOException
100    */
101   public EnableTableProcedure(
102       final MasterProcedureEnv env,
103       final TableName tableName,
104       final boolean skipTableStateCheck,
105       final ProcedurePrepareLatch syncLatch) throws IOException {
106     this.tableName = tableName;
107     this.skipTableStateCheck = skipTableStateCheck;
108     this.user = env.getRequestUser().getUGI();
109     this.setOwner(this.user.getShortUserName());
110 
111     // Compatible with 1.0: We use latch to make sure that this procedure implementation is
112     // compatible with 1.0 asynchronized operations. We need to lock the table and check
113     // whether the Enable operation could be performed (table exists and offline; table state
114     // is DISABLED). Once it is done, we are good to release the latch and the client can
115     // start asynchronously wait for the operation.
116     //
117     // Note: the member syncLatch could be null if we are in failover or recovery scenario.
118     // This is ok for backward compatible, as 1.0 client would not able to peek at procedure.
119     this.syncLatch = syncLatch;
120   }
121 
122   @Override
123   protected Flow executeFromState(final MasterProcedureEnv env, final EnableTableState state)
124       throws InterruptedException {
125     if (isTraceEnabled()) {
126       LOG.trace(this + " execute state=" + state);
127     }
128 
129     try {
130       switch (state) {
131       case ENABLE_TABLE_PREPARE:
132         if (prepareEnable(env)) {
133           setNextState(EnableTableState.ENABLE_TABLE_PRE_OPERATION);
134         } else {
135           assert isFailed() : "enable should have an exception here";
136           return Flow.NO_MORE_STATE;
137         }
138         break;
139       case ENABLE_TABLE_PRE_OPERATION:
140         preEnable(env, state);
141         setNextState(EnableTableState.ENABLE_TABLE_SET_ENABLING_TABLE_STATE);
142         break;
143       case ENABLE_TABLE_SET_ENABLING_TABLE_STATE:
144         setTableStateToEnabling(env, tableName);
145         setNextState(EnableTableState.ENABLE_TABLE_MARK_REGIONS_ONLINE);
146         break;
147       case ENABLE_TABLE_MARK_REGIONS_ONLINE:
148         markRegionsOnline(env, tableName, true);
149         setNextState(EnableTableState.ENABLE_TABLE_SET_ENABLED_TABLE_STATE);
150         break;
151       case ENABLE_TABLE_SET_ENABLED_TABLE_STATE:
152         setTableStateToEnabled(env, tableName);
153         setNextState(EnableTableState.ENABLE_TABLE_POST_OPERATION);
154         break;
155       case ENABLE_TABLE_POST_OPERATION:
156         postEnable(env, state);
157         return Flow.NO_MORE_STATE;
158       default:
159         throw new UnsupportedOperationException("unhandled state=" + state);
160       }
161     } catch (HBaseException|IOException e) {
162       LOG.error("Error trying to enable table=" + tableName + " state=" + state, e);
163       setFailure("master-enable-table", e);
164     }
165     return Flow.HAS_MORE_STATE;
166   }
167 
168   @Override
169   protected void rollbackState(final MasterProcedureEnv env, final EnableTableState state)
170       throws IOException {
171     if (isTraceEnabled()) {
172       LOG.trace(this + " rollback state=" + state);
173     }
174     try {
175       switch (state) {
176       case ENABLE_TABLE_POST_OPERATION:
177         // TODO-MAYBE: call the coprocessor event to undo (eg. DisableTableProcedure.preDisable())?
178         break;
179       case ENABLE_TABLE_SET_ENABLED_TABLE_STATE:
180         DisableTableProcedure.setTableStateToDisabling(env, tableName);
181         break;
182       case ENABLE_TABLE_MARK_REGIONS_ONLINE:
183         markRegionsOfflineDuringRecovery(env);
184         break;
185       case ENABLE_TABLE_SET_ENABLING_TABLE_STATE:
186         DisableTableProcedure.setTableStateToDisabled(env, tableName);
187         break;
188       case ENABLE_TABLE_PRE_OPERATION:
189         // TODO-MAYBE: call the coprocessor event to undo (eg. DisableTableProcedure.postDisable())?
190         break;
191       case ENABLE_TABLE_PREPARE:
192         // Nothing to undo for this state.
193         // We do need to count down the latch count so that we don't stuck.
194         ProcedurePrepareLatch.releaseLatch(syncLatch, this);
195         break;
196       default:
197         throw new UnsupportedOperationException("unhandled state=" + state);
198       }
199     } catch (HBaseException e) {
200       LOG.warn("Failed enable table rollback attempt step=" + state + " table=" + tableName, e);
201       throw new IOException(e);
202     } catch (IOException e) {
203       // This will be retried. Unless there is a bug in the code,
204       // this should be just a "temporary error" (e.g. network down)
205       LOG.warn("Failed enable table rollback attempt step=" + state + " table=" + tableName, e);
206       throw e;
207     }
208   }
209 
210   @Override
211   protected EnableTableState getState(final int stateId) {
212     return EnableTableState.valueOf(stateId);
213   }
214 
215   @Override
216   protected int getStateId(final EnableTableState state) {
217     return state.getNumber();
218   }
219 
220   @Override
221   protected EnableTableState getInitialState() {
222     return EnableTableState.ENABLE_TABLE_PREPARE;
223   }
224 
225   @Override
226   protected void setNextState(final EnableTableState state) {
227     if (aborted.get()) {
228       setAbortFailure("Enable-table", "abort requested");
229     } else {
230       super.setNextState(state);
231     }
232   }
233 
234   @Override
235   public boolean abort(final MasterProcedureEnv env) {
236     aborted.set(true);
237     return true;
238   }
239 
240   @Override
241   protected boolean acquireLock(final MasterProcedureEnv env) {
242     if (env.waitInitialized(this)) return false;
243     return env.getProcedureQueue().tryAcquireTableExclusiveLock(this, tableName);
244   }
245 
246   @Override
247   protected void releaseLock(final MasterProcedureEnv env) {
248     env.getProcedureQueue().releaseTableExclusiveLock(this, tableName);
249   }
250 
251   @Override
252   public void serializeStateData(final OutputStream stream) throws IOException {
253     super.serializeStateData(stream);
254 
255     MasterProcedureProtos.EnableTableStateData.Builder enableTableMsg =
256         MasterProcedureProtos.EnableTableStateData.newBuilder()
257             .setUserInfo(MasterProcedureUtil.toProtoUserInfo(user))
258             .setTableName(ProtobufUtil.toProtoTableName(tableName))
259             .setSkipTableStateCheck(skipTableStateCheck);
260 
261     enableTableMsg.build().writeDelimitedTo(stream);
262   }
263 
264   @Override
265   public void deserializeStateData(final InputStream stream) throws IOException {
266     super.deserializeStateData(stream);
267 
268     MasterProcedureProtos.EnableTableStateData enableTableMsg =
269         MasterProcedureProtos.EnableTableStateData.parseDelimitedFrom(stream);
270     user = MasterProcedureUtil.toUserInfo(enableTableMsg.getUserInfo());
271     tableName = ProtobufUtil.toTableName(enableTableMsg.getTableName());
272     skipTableStateCheck = enableTableMsg.getSkipTableStateCheck();
273   }
274 
275   @Override
276   public void toStringClassDetails(StringBuilder sb) {
277     sb.append(getClass().getSimpleName());
278     sb.append(" (table=");
279     sb.append(tableName);
280     sb.append(")");
281   }
282 
283   @Override
284   public TableName getTableName() {
285     return tableName;
286   }
287 
288   @Override
289   public TableOperationType getTableOperationType() {
290     return TableOperationType.ENABLE;
291   }
292 
293 
294   /**
295    * Action before any real action of enabling table. Set the exception in the procedure instead
296    * of throwing it.  This approach is to deal with backward compatible with 1.0.
297    * @param env MasterProcedureEnv
298    * @return whether the table passes the necessary checks
299    * @throws IOException
300    */
301   private boolean prepareEnable(final MasterProcedureEnv env) throws IOException {
302     boolean canTableBeEnabled = true;
303 
304     // Check whether table exists
305     if (!MetaTableAccessor.tableExists(env.getMasterServices().getConnection(), tableName)) {
306       setFailure("master-enable-table", new TableNotFoundException(tableName));
307       canTableBeEnabled = false;
308     } else if (!skipTableStateCheck) {
309       // There could be multiple client requests trying to disable or enable
310       // the table at the same time. Ensure only the first request is honored
311       // After that, no other requests can be accepted until the table reaches
312       // DISABLED or ENABLED.
313       //
314       // Note: in 1.0 release, we called TableStateManager.setTableStateIfInStates() to set
315       // the state to ENABLING from DISABLED. The implementation was done before table lock
316       // was implemented. With table lock, there is no need to set the state here (it will
317       // set the state later on). A quick state check should be enough for us to move forward.
318       TableStateManager tsm = env.getMasterServices().getAssignmentManager().getTableStateManager();
319       if (!tsm.isTableState(tableName, ZooKeeperProtos.Table.State.DISABLED)) {
320         LOG.info("Table " + tableName + " isn't disabled; skipping enable");
321         setFailure("master-enable-table", new TableNotDisabledException(this.tableName));
322         canTableBeEnabled = false;
323       }
324     }
325 
326     // We are done the check. Future actions in this procedure could be done asynchronously.
327     ProcedurePrepareLatch.releaseLatch(syncLatch, this);
328 
329     return canTableBeEnabled;
330   }
331 
332   /**
333    * Action before enabling table.
334    * @param env MasterProcedureEnv
335    * @param state the procedure state
336    * @throws IOException
337    * @throws InterruptedException
338    */
339   private void preEnable(final MasterProcedureEnv env, final EnableTableState state)
340       throws IOException, InterruptedException {
341     runCoprocessorAction(env, state);
342   }
343 
344   /**
345    * Mark table state to Enabling
346    * @param env MasterProcedureEnv
347    * @param tableName the target table
348    * @throws IOException
349    */
350   protected static void setTableStateToEnabling(
351       final MasterProcedureEnv env,
352       final TableName tableName) throws HBaseException, IOException {
353     // Set table disabling flag up in zk.
354     LOG.info("Attempting to enable the table " + tableName);
355     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
356       tableName,
357       ZooKeeperProtos.Table.State.ENABLING);
358   }
359 
360   /**
361    * Mark offline regions of the table online with retry
362    * @param env MasterProcedureEnv
363    * @param tableName the target table
364    * @param retryRequired whether to retry if the first run failed
365    * @throws IOException
366    */
367   protected static void markRegionsOnline(
368       final MasterProcedureEnv env,
369       final TableName tableName,
370       final Boolean retryRequired) throws IOException {
371     // This is best effort approach to make all regions of a table online.  If we fail to do
372     // that, it is ok that the table has some offline regions; user can fix it manually.
373 
374     // Dev consideration: add a config to control max number of retry. For now, it is hard coded.
375     int maxTry = (retryRequired ? 10 : 1);
376     boolean done = false;
377 
378     do {
379       try {
380         done = markRegionsOnline(env, tableName);
381         if (done) {
382           break;
383         }
384         maxTry--;
385       } catch (Exception e) {
386         LOG.warn("Received exception while marking regions online. tries left: " + maxTry, e);
387         maxTry--;
388         if (maxTry > 0) {
389           continue; // we still have some retry left, try again.
390         }
391         throw e;
392       }
393     } while (maxTry > 0);
394 
395     if (!done) {
396       LOG.warn("Some or all regions of the Table '" + tableName + "' were offline");
397     }
398   }
399 
400   /**
401    * Mark offline regions of the table online
402    * @param env MasterProcedureEnv
403    * @param tableName the target table
404    * @return whether the operation is fully completed or being interrupted.
405    * @throws IOException
406    */
407   private static boolean markRegionsOnline(final MasterProcedureEnv env, final TableName tableName)
408       throws IOException {
409     final AssignmentManager assignmentManager = env.getMasterServices().getAssignmentManager();
410     final MasterServices masterServices = env.getMasterServices();
411     final ServerManager serverManager = masterServices.getServerManager();
412     boolean done = false;
413     // Get the regions of this table. We're done when all listed
414     // tables are onlined.
415     List<Pair<HRegionInfo, ServerName>> tableRegionsAndLocations;
416 
417     if (TableName.META_TABLE_NAME.equals(tableName)) {
418       tableRegionsAndLocations =
419           new MetaTableLocator().getMetaRegionsAndLocations(masterServices.getZooKeeper());
420     } else {
421       tableRegionsAndLocations =
422           MetaTableAccessor.getTableRegionsAndLocations(
423             masterServices.getZooKeeper(), masterServices.getConnection(), tableName, true);
424     }
425 
426     int countOfRegionsInTable = tableRegionsAndLocations.size();
427     Map<HRegionInfo, ServerName> regionsToAssign =
428         regionsToAssignWithServerName(env, tableRegionsAndLocations);
429 
430     // need to potentially create some regions for the replicas
431     List<HRegionInfo> unrecordedReplicas =
432         AssignmentManager.replicaRegionsNotRecordedInMeta(new HashSet<HRegionInfo>(
433             regionsToAssign.keySet()), masterServices);
434     Map<ServerName, List<HRegionInfo>> srvToUnassignedRegs =
435         assignmentManager.getBalancer().roundRobinAssignment(unrecordedReplicas,
436           serverManager.getOnlineServersList());
437     if (srvToUnassignedRegs != null) {
438       for (Map.Entry<ServerName, List<HRegionInfo>> entry : srvToUnassignedRegs.entrySet()) {
439         for (HRegionInfo h : entry.getValue()) {
440           regionsToAssign.put(h, entry.getKey());
441         }
442       }
443     }
444 
445     int offlineRegionsCount = regionsToAssign.size();
446 
447     LOG.info("Table '" + tableName + "' has " + countOfRegionsInTable + " regions, of which "
448         + offlineRegionsCount + " are offline.");
449     if (offlineRegionsCount == 0) {
450       return true;
451     }
452 
453     List<ServerName> onlineServers = serverManager.createDestinationServersList();
454     Map<ServerName, List<HRegionInfo>> bulkPlan =
455         env.getMasterServices().getAssignmentManager().getBalancer()
456             .retainAssignment(regionsToAssign, onlineServers);
457     if (bulkPlan != null) {
458       LOG.info("Bulk assigning " + offlineRegionsCount + " region(s) across " + bulkPlan.size()
459           + " server(s), retainAssignment=true");
460 
461       BulkAssigner ba = new GeneralBulkAssigner(masterServices, bulkPlan, assignmentManager, true);
462       try {
463         if (ba.bulkAssign()) {
464           done = true;
465         }
466       } catch (InterruptedException e) {
467         LOG.warn("Enable operation was interrupted when enabling table '" + tableName + "'");
468         // Preserve the interrupt.
469         Thread.currentThread().interrupt();
470       }
471     } else {
472       LOG.info("Balancer was unable to find suitable servers for table " + tableName
473           + ", leaving unassigned");
474     }
475     return done;
476   }
477 
478   /**
479    * Mark regions of the table offline during recovery
480    * @param env MasterProcedureEnv
481    */
482   private void markRegionsOfflineDuringRecovery(final MasterProcedureEnv env) {
483     try {
484       // This is a best effort attempt. We will move on even it does not succeed. We will retry
485       // several times until we giving up.
486       DisableTableProcedure.markRegionsOffline(env, tableName, true);
487     } catch (Exception e) {
488       LOG.debug("Failed to offline all regions of table " + tableName + ". Ignoring", e);
489     }
490   }
491 
492   /**
493    * Mark table state to Enabled
494    * @param env MasterProcedureEnv
495    * @throws IOException
496    */
497   protected static void setTableStateToEnabled(
498       final MasterProcedureEnv env,
499       final TableName tableName) throws HBaseException, IOException {
500     // Flip the table to Enabled
501     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
502       tableName,
503       ZooKeeperProtos.Table.State.ENABLED);
504     LOG.info("Table '" + tableName + "' was successfully enabled.");
505   }
506 
507   /**
508    * Action after enabling table.
509    * @param env MasterProcedureEnv
510    * @param state the procedure state
511    * @throws IOException
512    * @throws InterruptedException
513    */
514   private void postEnable(final MasterProcedureEnv env, final EnableTableState state)
515       throws IOException, InterruptedException {
516     runCoprocessorAction(env, state);
517   }
518 
519   /**
520    * The procedure could be restarted from a different machine. If the variable is null, we need to
521    * retrieve it.
522    * @return traceEnabled
523    */
524   private Boolean isTraceEnabled() {
525     if (traceEnabled == null) {
526       traceEnabled = LOG.isTraceEnabled();
527     }
528     return traceEnabled;
529   }
530 
531   /**
532    * @param regionsInMeta
533    * @return List of regions neither in transition nor assigned.
534    * @throws IOException
535    */
536   private static Map<HRegionInfo, ServerName> regionsToAssignWithServerName(
537       final MasterProcedureEnv env,
538       final List<Pair<HRegionInfo, ServerName>> regionsInMeta) throws IOException {
539     Map<HRegionInfo, ServerName> regionsToAssign =
540         new HashMap<HRegionInfo, ServerName>(regionsInMeta.size());
541     RegionStates regionStates = env.getMasterServices().getAssignmentManager().getRegionStates();
542     for (Pair<HRegionInfo, ServerName> regionLocation : regionsInMeta) {
543       HRegionInfo hri = regionLocation.getFirst();
544       ServerName sn = regionLocation.getSecond();
545       if (regionStates.isRegionOffline(hri)) {
546         regionsToAssign.put(hri, sn);
547       } else {
548         if (LOG.isDebugEnabled()) {
549           LOG.debug("Skipping assign for the region " + hri + " during enable table "
550               + hri.getTable() + " because its already in tranition or assigned.");
551         }
552       }
553     }
554     return regionsToAssign;
555   }
556 
557   /**
558    * Coprocessor Action.
559    * @param env MasterProcedureEnv
560    * @param state the procedure state
561    * @throws IOException
562    * @throws InterruptedException
563    */
564   private void runCoprocessorAction(final MasterProcedureEnv env, final EnableTableState state)
565       throws IOException, InterruptedException {
566     final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
567     if (cpHost != null) {
568       user.doAs(new PrivilegedExceptionAction<Void>() {
569         @Override
570         public Void run() throws Exception {
571           switch (state) {
572           case ENABLE_TABLE_PRE_OPERATION:
573             cpHost.preEnableTableHandler(getTableName());
574             break;
575           case ENABLE_TABLE_POST_OPERATION:
576             cpHost.postEnableTableHandler(getTableName());
577             break;
578           default:
579             throw new UnsupportedOperationException(this + " unhandled state=" + state);
580           }
581           return null;
582         }
583       });
584     }
585   }
586 }