1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.zookeeper;
19
20 import com.google.common.base.Stopwatch;
21 import com.google.protobuf.InvalidProtocolBufferException;
22
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.hbase.classification.InterfaceAudience;
26 import org.apache.hadoop.hbase.HConstants;
27 import org.apache.hadoop.hbase.HRegionInfo;
28 import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
29 import org.apache.hadoop.hbase.client.HConnection;
30 import org.apache.hadoop.hbase.client.RetriesExhaustedException;
31 import org.apache.hadoop.hbase.exceptions.DeserializationException;
32 import org.apache.hadoop.hbase.ServerName;
33 import org.apache.hadoop.hbase.ipc.FailedServerException;
34 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
35 import org.apache.hadoop.hbase.master.RegionState;
36 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
37 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
38 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
39 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
40 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
41 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.MetaRegionServer;
42 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
43 import org.apache.hadoop.hbase.util.Bytes;
44 import org.apache.hadoop.hbase.util.Pair;
45 import org.apache.hadoop.ipc.RemoteException;
46 import org.apache.zookeeper.KeeperException;
47
48 import java.io.EOFException;
49 import java.io.IOException;
50 import java.net.ConnectException;
51 import java.net.NoRouteToHostException;
52 import java.net.SocketException;
53 import java.net.SocketTimeoutException;
54 import java.rmi.UnknownHostException;
55
56 import java.util.List;
57 import java.util.ArrayList;
58
59 import javax.annotation.Nullable;
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76 @InterfaceAudience.Private
77 public class MetaTableLocator {
78 private static final Log LOG = LogFactory.getLog(MetaTableLocator.class);
79
80 static final byte [] META_REGION_NAME =
81 HRegionInfo.FIRST_META_REGIONINFO.getRegionName();
82
83
84 private volatile boolean stopped = false;
85
86
87
88
89
90 public boolean isLocationAvailable(ZooKeeperWatcher zkw) {
91 return getMetaRegionLocation(zkw) != null;
92 }
93
94
95
96
97
98 public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations(ZooKeeperWatcher zkw) {
99 ServerName serverName = new MetaTableLocator().getMetaRegionLocation(zkw);
100 List<Pair<HRegionInfo, ServerName>> list = new ArrayList<Pair<HRegionInfo, ServerName>>();
101 list.add(new Pair<HRegionInfo, ServerName>(HRegionInfo.FIRST_META_REGIONINFO, serverName));
102 return list;
103 }
104
105
106
107
108
109 public List<HRegionInfo> getMetaRegions(ZooKeeperWatcher zkw) {
110 List<Pair<HRegionInfo, ServerName>> result;
111 result = getMetaRegionsAndLocations(zkw);
112 return getListOfHRegionInfos(result);
113 }
114
115 private List<HRegionInfo> getListOfHRegionInfos(
116 final List<Pair<HRegionInfo, ServerName>> pairs) {
117 if (pairs == null || pairs.isEmpty()) return null;
118 List<HRegionInfo> result = new ArrayList<HRegionInfo>(pairs.size());
119 for (Pair<HRegionInfo, ServerName> pair: pairs) {
120 result.add(pair.getFirst());
121 }
122 return result;
123 }
124
125
126
127
128
129
130 @Nullable
131 public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw) {
132 try {
133 RegionState state = getMetaRegionState(zkw);
134 return state.isOpened() ? state.getServerName() : null;
135 } catch (KeeperException ke) {
136 return null;
137 }
138 }
139
140
141
142
143
144
145
146
147
148
149
150 public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, long timeout)
151 throws InterruptedException, NotAllMetaRegionsOnlineException {
152 try {
153 if (ZKUtil.checkExists(zkw, zkw.baseZNode) == -1) {
154 String errorMsg = "Check the value configured in 'zookeeper.znode.parent'. "
155 + "There could be a mismatch with the one configured in the master.";
156 LOG.error(errorMsg);
157 throw new IllegalArgumentException(errorMsg);
158 }
159 } catch (KeeperException e) {
160 throw new IllegalStateException("KeeperException while trying to check baseZNode:", e);
161 }
162 ServerName sn = blockUntilAvailable(zkw, timeout);
163
164 if (sn == null) {
165 throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms");
166 }
167
168 return sn;
169 }
170
171
172
173
174
175
176
177
178 public void waitMetaRegionLocation(ZooKeeperWatcher zkw) throws InterruptedException {
179 Stopwatch stopwatch = new Stopwatch().start();
180 while (!stopped) {
181 try {
182 if (waitMetaRegionLocation(zkw, 100) != null) break;
183 long sleepTime = stopwatch.elapsedMillis();
184
185 if ((sleepTime + 1) % 10000 == 0) {
186 LOG.warn("Have been waiting for meta to be assigned for " + sleepTime + "ms");
187 }
188 } catch (NotAllMetaRegionsOnlineException e) {
189 if (LOG.isTraceEnabled()) {
190 LOG.trace("hbase:meta still not available, sleeping and retrying." +
191 " Reason: " + e.getMessage());
192 }
193 }
194 }
195 }
196
197
198
199
200
201
202
203
204
205 public boolean verifyMetaRegionLocation(HConnection hConnection,
206 ZooKeeperWatcher zkw, final long timeout)
207 throws InterruptedException, IOException {
208 AdminProtos.AdminService.BlockingInterface service = null;
209 try {
210 service = getMetaServerConnection(hConnection, zkw, timeout);
211 } catch (NotAllMetaRegionsOnlineException e) {
212
213 } catch (ServerNotRunningYetException e) {
214
215 } catch (UnknownHostException e) {
216
217 } catch (RegionServerStoppedException e) {
218
219 }
220 return (service != null) && verifyRegionLocation(service,
221 getMetaRegionLocation(zkw), META_REGION_NAME);
222 }
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239 private boolean verifyRegionLocation(AdminService.BlockingInterface hostingServer,
240 final ServerName address, final byte [] regionName)
241 throws IOException {
242 if (hostingServer == null) {
243 LOG.info("Passed hostingServer is null");
244 return false;
245 }
246 Throwable t;
247 try {
248
249 return ProtobufUtil.getRegionInfo(hostingServer, regionName) != null;
250 } catch (ConnectException e) {
251 t = e;
252 } catch (RetriesExhaustedException e) {
253 t = e;
254 } catch (RemoteException e) {
255 IOException ioe = e.unwrapRemoteException();
256 t = ioe;
257 } catch (IOException e) {
258 Throwable cause = e.getCause();
259 if (cause != null && cause instanceof EOFException) {
260 t = cause;
261 } else if (cause != null && cause.getMessage() != null
262 && cause.getMessage().contains("Connection reset")) {
263 t = cause;
264 } else {
265 t = e;
266 }
267 }
268 LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) +
269 " at address=" + address + ", exception=" + t.getMessage());
270 return false;
271 }
272
273
274
275
276
277
278
279
280
281
282
283 private AdminService.BlockingInterface getMetaServerConnection(HConnection hConnection,
284 ZooKeeperWatcher zkw, long timeout)
285 throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
286 return getCachedConnection(hConnection, waitMetaRegionLocation(zkw, timeout));
287 }
288
289
290
291
292
293
294
295
296 @SuppressWarnings("deprecation")
297 private static AdminService.BlockingInterface getCachedConnection(HConnection hConnection,
298 ServerName sn)
299 throws IOException {
300 if (sn == null) {
301 return null;
302 }
303 AdminService.BlockingInterface service = null;
304 try {
305 service = hConnection.getAdmin(sn);
306 } catch (RetriesExhaustedException e) {
307 if (e.getCause() != null && e.getCause() instanceof ConnectException) {
308
309 } else {
310 throw e;
311 }
312 } catch (SocketTimeoutException e) {
313 LOG.debug("Timed out connecting to " + sn);
314 } catch (NoRouteToHostException e) {
315 LOG.debug("Connecting to " + sn, e);
316 } catch (SocketException e) {
317 LOG.debug("Exception connecting to " + sn);
318 } catch (UnknownHostException e) {
319 LOG.debug("Unknown host exception connecting to " + sn);
320 } catch (FailedServerException e) {
321 if (LOG.isDebugEnabled()) {
322 LOG.debug("Server " + sn + " is in failed server list.");
323 }
324 } catch (IOException ioe) {
325 Throwable cause = ioe.getCause();
326 if (ioe instanceof ConnectException) {
327
328 } else if (cause != null && cause instanceof EOFException) {
329
330 } else if (cause != null && cause.getMessage() != null &&
331 cause.getMessage().toLowerCase().contains("connection reset")) {
332
333 } else {
334 throw ioe;
335 }
336
337 }
338 return service;
339 }
340
341
342
343
344
345
346
347
348
349 public static void setMetaLocation(ZooKeeperWatcher zookeeper,
350 ServerName serverName, RegionState.State state) throws KeeperException {
351 LOG.info("Setting hbase:meta region location in ZooKeeper as " + serverName);
352
353
354 MetaRegionServer pbrsr = MetaRegionServer.newBuilder()
355 .setServer(ProtobufUtil.toServerName(serverName))
356 .setRpcVersion(HConstants.RPC_CURRENT_VERSION)
357 .setState(state.convert()).build();
358 byte[] data = ProtobufUtil.prependPBMagic(pbrsr.toByteArray());
359 try {
360 ZKUtil.setData(zookeeper, zookeeper.metaServerZNode, data);
361 } catch(KeeperException.NoNodeException nne) {
362 LOG.debug("META region location doesn't existed, create it");
363 ZKUtil.createAndWatch(zookeeper, zookeeper.metaServerZNode, data);
364 }
365 }
366
367
368
369
370 public static RegionState getMetaRegionState(ZooKeeperWatcher zkw) throws KeeperException {
371 RegionState.State state = RegionState.State.OPEN;
372 ServerName serverName = null;
373 try {
374 byte[] data = ZKUtil.getData(zkw, zkw.metaServerZNode);
375 if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) {
376 try {
377 int prefixLen = ProtobufUtil.lengthOfPBMagic();
378 ZooKeeperProtos.MetaRegionServer rl =
379 ZooKeeperProtos.MetaRegionServer.PARSER.parseFrom
380 (data, prefixLen, data.length - prefixLen);
381 if (rl.hasState()) {
382 state = RegionState.State.convert(rl.getState());
383 }
384 HBaseProtos.ServerName sn = rl.getServer();
385 serverName = ServerName.valueOf(
386 sn.getHostName(), sn.getPort(), sn.getStartCode());
387 } catch (InvalidProtocolBufferException e) {
388 throw new DeserializationException("Unable to parse meta region location");
389 }
390 } else {
391
392 serverName = ServerName.parseFrom(data);
393 }
394 } catch (DeserializationException e) {
395 throw ZKUtil.convert(e);
396 } catch (InterruptedException e) {
397 Thread.currentThread().interrupt();
398 }
399 if (serverName == null) {
400 state = RegionState.State.OFFLINE;
401 }
402 return new RegionState(HRegionInfo.FIRST_META_REGIONINFO,
403 state, serverName);
404 }
405
406
407
408
409
410
411 public void deleteMetaLocation(ZooKeeperWatcher zookeeper)
412 throws KeeperException {
413 LOG.info("Deleting hbase:meta region location in ZooKeeper");
414 try {
415
416 ZKUtil.deleteNode(zookeeper, zookeeper.metaServerZNode);
417 } catch(KeeperException.NoNodeException nne) {
418
419 }
420 }
421
422
423
424
425
426
427
428
429 public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw,
430 final long timeout)
431 throws InterruptedException {
432 if (timeout < 0) throw new IllegalArgumentException();
433 if (zkw == null) throw new IllegalArgumentException();
434 Stopwatch sw = new Stopwatch().start();
435 ServerName sn = null;
436 try {
437 while (true) {
438 sn = getMetaRegionLocation(zkw);
439 if (sn != null || sw.elapsedMillis()
440 > timeout - HConstants.SOCKET_RETRY_WAIT_MS) {
441 break;
442 }
443 Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS);
444 }
445 } finally {
446 sw.stop();
447 }
448 return sn;
449 }
450
451
452
453
454
455 public void stop() {
456 if (!stopped) {
457 LOG.debug("Stopping MetaTableLocator");
458 stopped = true;
459 }
460 }
461 }