1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.catalog;
19
20 import com.google.common.base.Stopwatch;
21 import org.apache.commons.logging.Log;
22 import org.apache.commons.logging.LogFactory;
23 import org.apache.hadoop.classification.InterfaceAudience;
24 import org.apache.hadoop.conf.Configuration;
25 import org.apache.hadoop.hbase.Abortable;
26 import org.apache.hadoop.hbase.HRegionInfo;
27 import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
28 import org.apache.hadoop.hbase.ServerName;
29 import org.apache.hadoop.hbase.client.HConnection;
30 import org.apache.hadoop.hbase.client.HConnectionManager;
31 import org.apache.hadoop.hbase.client.HTable;
32 import org.apache.hadoop.hbase.client.RetriesExhaustedException;
33 import org.apache.hadoop.hbase.ipc.RpcClient.FailedServerException;
34 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
35 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
36 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
37 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
38 import org.apache.hadoop.hbase.util.Bytes;
39 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
40 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
41 import org.apache.hadoop.ipc.RemoteException;
42
43 import java.io.EOFException;
44 import java.io.IOException;
45 import java.net.ConnectException;
46 import java.net.NoRouteToHostException;
47 import java.net.SocketException;
48 import java.net.SocketTimeoutException;
49 import java.net.UnknownHostException;
50
51
52
53
54
55
56
57
58
59
60
61
62 @InterfaceAudience.Private
63 public class CatalogTracker {
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109 private static final Log LOG = LogFactory.getLog(CatalogTracker.class);
110 private final HConnection connection;
111 private final ZooKeeperWatcher zookeeper;
112 private final MetaRegionTracker metaRegionTracker;
113 private boolean instantiatedzkw = false;
114 private Abortable abortable;
115
116 private boolean stopped = false;
117
118 static final byte [] META_REGION_NAME =
119 HRegionInfo.FIRST_META_REGIONINFO.getRegionName();
120
121
122
123
124
125
126
127
128
129
130
131
132 public CatalogTracker(final Configuration conf) throws IOException {
133 this(null, conf, null);
134 }
135
136
137
138
139
140
141
142
143
144
145
146
147
148 public CatalogTracker(final ZooKeeperWatcher zk, final Configuration conf,
149 Abortable abortable)
150 throws IOException {
151 this(zk, conf, HConnectionManager.getConnection(conf), abortable);
152 }
153
154 public CatalogTracker(final ZooKeeperWatcher zk, final Configuration conf,
155 HConnection connection, Abortable abortable)
156 throws IOException {
157 this.connection = connection;
158 if (abortable == null) {
159
160 this.abortable = this.connection;
161 }
162 Abortable throwableAborter = new Abortable() {
163
164 @Override
165 public void abort(String why, Throwable e) {
166 throw new RuntimeException(why, e);
167 }
168
169 @Override
170 public boolean isAborted() {
171 return true;
172 }
173
174 };
175 if (zk == null) {
176
177 this.zookeeper =
178 new ZooKeeperWatcher(conf, "catalogtracker-on-" + connection.toString(),
179 abortable);
180 instantiatedzkw = true;
181 } else {
182 this.zookeeper = zk;
183 }
184 this.metaRegionTracker = new MetaRegionTracker(zookeeper, throwableAborter);
185 }
186
187
188
189
190
191
192
193
194 public void start() throws IOException, InterruptedException {
195 LOG.debug("Starting catalog tracker " + this);
196 try {
197 this.metaRegionTracker.start();
198 } catch (RuntimeException e) {
199 Throwable t = e.getCause();
200 this.abortable.abort(e.getMessage(), t);
201 throw new IOException("Attempt to start meta tracker failed.", t);
202 }
203 }
204
205
206
207
208
209 public void stop() {
210 if (!this.stopped) {
211 LOG.debug("Stopping catalog tracker " + this);
212 this.stopped = true;
213 this.metaRegionTracker.stop();
214 try {
215 if (this.connection != null) {
216 this.connection.close();
217 }
218 } catch (IOException e) {
219
220
221 LOG.error("Attempt to close catalog tracker's connection failed.", e);
222 }
223 if (this.instantiatedzkw) {
224 this.zookeeper.close();
225 }
226 }
227 }
228
229
230
231
232
233
234
235
236 public ServerName getMetaLocation() throws InterruptedException {
237 return this.metaRegionTracker.getMetaRegionLocation();
238 }
239
240
241
242
243
244 public boolean isMetaLocationAvailable() {
245 return this.metaRegionTracker.isLocationAvailable();
246 }
247
248
249
250
251
252
253
254
255
256
257
258 public ServerName waitForMeta(final long timeout)
259 throws InterruptedException, NotAllMetaRegionsOnlineException {
260 ServerName sn = metaRegionTracker.waitMetaRegionLocation(timeout);
261 if (sn == null) {
262 throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms");
263 }
264 return sn;
265 }
266
267
268
269
270
271
272
273
274
275
276
277
278 public AdminService.BlockingInterface waitForMetaServerConnection(long timeout)
279 throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
280 return getMetaServerConnection(timeout);
281 }
282
283
284
285
286
287
288
289
290
291
292
293
294 AdminService.BlockingInterface getMetaServerConnection(long timeout)
295 throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
296 return getCachedConnection(waitForMeta(timeout));
297 }
298
299
300
301
302
303
304
305
306 public void waitForMeta() throws InterruptedException {
307 Stopwatch stopwatch = new Stopwatch().start();
308 while (!this.stopped) {
309 try {
310 if (waitForMeta(100) != null) break;
311 long sleepTime = stopwatch.elapsedMillis();
312
313 if ((sleepTime + 1) % 10000 == 0) {
314 LOG.warn("Have been waiting for meta to be assigned for " + sleepTime + "ms");
315 }
316 } catch (NotAllMetaRegionsOnlineException e) {
317 if (LOG.isTraceEnabled()) {
318 LOG.trace("hbase:meta still not available, sleeping and retrying." +
319 " Reason: " + e.getMessage());
320 }
321 }
322 }
323 }
324
325
326
327
328
329
330
331
332 private AdminService.BlockingInterface getCachedConnection(ServerName sn)
333 throws IOException {
334 if (sn == null) {
335 return null;
336 }
337 AdminService.BlockingInterface service = null;
338 try {
339 service = connection.getAdmin(sn);
340 } catch (RetriesExhaustedException e) {
341 if (e.getCause() != null && e.getCause() instanceof ConnectException) {
342
343 } else {
344 throw e;
345 }
346 } catch (SocketTimeoutException e) {
347 LOG.debug("Timed out connecting to " + sn);
348 } catch (NoRouteToHostException e) {
349 LOG.debug("Connecting to " + sn, e);
350 } catch (SocketException e) {
351 LOG.debug("Exception connecting to " + sn);
352 } catch (UnknownHostException e) {
353 LOG.debug("Unknown host exception connecting to " + sn);
354 } catch (FailedServerException e) {
355 if (LOG.isDebugEnabled()) {
356 LOG.debug("Server " + sn + " is in failed server list.");
357 }
358 } catch (IOException ioe) {
359 Throwable cause = ioe.getCause();
360 if (ioe instanceof ConnectException) {
361
362 } else if (cause != null && cause instanceof EOFException) {
363
364 } else if (cause != null && cause.getMessage() != null &&
365 cause.getMessage().toLowerCase().contains("connection reset")) {
366
367 } else {
368 throw ioe;
369 }
370
371 }
372 return service;
373 }
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390 private boolean verifyRegionLocation(AdminService.BlockingInterface hostingServer,
391 final ServerName address, final byte [] regionName)
392 throws IOException {
393 if (hostingServer == null) {
394 LOG.info("Passed hostingServer is null");
395 return false;
396 }
397 Throwable t = null;
398 try {
399
400 return ProtobufUtil.getRegionInfo(hostingServer, regionName) != null;
401 } catch (ConnectException e) {
402 t = e;
403 } catch (RetriesExhaustedException e) {
404 t = e;
405 } catch (RemoteException e) {
406 IOException ioe = e.unwrapRemoteException();
407 t = ioe;
408 } catch (IOException e) {
409 Throwable cause = e.getCause();
410 if (cause != null && cause instanceof EOFException) {
411 t = cause;
412 } else if (cause != null && cause.getMessage() != null
413 && cause.getMessage().contains("Connection reset")) {
414 t = cause;
415 } else {
416 t = e;
417 }
418 }
419 LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) +
420 " at address=" + address + ", exception=" + t);
421 return false;
422 }
423
424
425
426
427
428
429
430
431
432 public boolean verifyMetaRegionLocation(final long timeout)
433 throws InterruptedException, IOException {
434 AdminService.BlockingInterface service = null;
435 try {
436 service = waitForMetaServerConnection(timeout);
437 } catch (NotAllMetaRegionsOnlineException e) {
438
439 } catch (ServerNotRunningYetException e) {
440
441 } catch (UnknownHostException e) {
442
443 } catch (RegionServerStoppedException e) {
444
445 }
446 return (service == null)? false:
447 verifyRegionLocation(service,
448 this.metaRegionTracker.getMetaRegionLocation(), META_REGION_NAME);
449 }
450
451 public HConnection getConnection() {
452 return this.connection;
453 }
454 }