1
2
3
4
5
6
7
8
9
10
11
12 package org.apache.hadoop.hbase.coordination;
13
14 import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_SPLIT;
15 import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_SPLITTING;
16 import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REQUEST_REGION_SPLIT;
17
18 import java.io.IOException;
19 import java.util.List;
20
21 import org.apache.commons.logging.Log;
22 import org.apache.commons.logging.LogFactory;
23 import org.apache.hadoop.hbase.CoordinatedStateManager;
24 import org.apache.hadoop.hbase.HRegionInfo;
25 import org.apache.hadoop.hbase.RegionTransition;
26 import org.apache.hadoop.hbase.ServerName;
27 import org.apache.hadoop.hbase.coordination.SplitTransactionCoordination;
28 import org.apache.hadoop.hbase.executor.EventType;
29 import org.apache.hadoop.hbase.regionserver.HRegion;
30 import org.apache.hadoop.hbase.regionserver.RegionServerServices;
31 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
32 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
33 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
34 import org.apache.zookeeper.KeeperException;
35 import org.apache.zookeeper.data.Stat;
36
37 public class ZKSplitTransactionCoordination implements SplitTransactionCoordination {
38
39 private CoordinatedStateManager coordinationManager;
40 private final ZooKeeperWatcher watcher;
41
42 private static final Log LOG = LogFactory.getLog(ZKSplitTransactionCoordination.class);
43
44 public ZKSplitTransactionCoordination(CoordinatedStateManager coordinationProvider,
45 ZooKeeperWatcher watcher) {
46 this.coordinationManager = coordinationProvider;
47 this.watcher = watcher;
48 }
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63 @Override
64 public void startSplitTransaction(HRegion parent, ServerName serverName, HRegionInfo hri_a,
65 HRegionInfo hri_b) throws IOException {
66
67 HRegionInfo region = parent.getRegionInfo();
68 try {
69
70 LOG.debug(watcher.prefix("Creating ephemeral node for " + region.getEncodedName()
71 + " in PENDING_SPLIT state"));
72 byte[] payload = HRegionInfo.toDelimitedByteArray(hri_a, hri_b);
73 RegionTransition rt =
74 RegionTransition.createRegionTransition(RS_ZK_REQUEST_REGION_SPLIT,
75 region.getRegionName(), serverName, payload);
76 String node = ZKAssign.getNodeName(watcher, region.getEncodedName());
77 if (!ZKUtil.createEphemeralNodeAndWatch(watcher, node, rt.toByteArray())) {
78 throw new IOException("Failed create of ephemeral " + node);
79 }
80
81 } catch (KeeperException e) {
82 throw new IOException("Failed creating PENDING_SPLIT znode on "
83 + parent.getRegionNameAsString(), e);
84 }
85
86 }
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121 private int transitionSplittingNode(HRegionInfo parent, HRegionInfo a, HRegionInfo b,
122 ServerName serverName, SplitTransactionDetails std, final EventType beginState,
123 final EventType endState) throws IOException {
124 ZkSplitTransactionDetails zstd = (ZkSplitTransactionDetails) std;
125 byte[] payload = HRegionInfo.toDelimitedByteArray(a, b);
126 try {
127 return ZKAssign.transitionNode(watcher, parent, serverName, beginState, endState,
128 zstd.getZnodeVersion(), payload);
129 } catch (KeeperException e) {
130 throw new IOException(
131 "Failed transition of splitting node " + parent.getRegionNameAsString(), e);
132 }
133 }
134
135
136
137
138
139
140
141 @Override
142 public void waitForSplitTransaction(final RegionServerServices services, HRegion parent,
143 HRegionInfo hri_a, HRegionInfo hri_b, SplitTransactionDetails sptd) throws IOException {
144 ZkSplitTransactionDetails zstd = (ZkSplitTransactionDetails) sptd;
145
146
147
148
149 try {
150 int spins = 0;
151 Stat stat = new Stat();
152 ServerName expectedServer = coordinationManager.getServer().getServerName();
153 String node = parent.getRegionInfo().getEncodedName();
154 while (!(coordinationManager.getServer().isStopped() || services.isStopping())) {
155 if (spins % 5 == 0) {
156 LOG.debug("Still waiting for master to process " + "the pending_split for " + node);
157 SplitTransactionDetails temp = getDefaultDetails();
158 transitionSplittingNode(parent.getRegionInfo(), hri_a, hri_b, expectedServer, temp,
159 RS_ZK_REQUEST_REGION_SPLIT, RS_ZK_REQUEST_REGION_SPLIT);
160 }
161 Thread.sleep(100);
162 spins++;
163 byte[] data = ZKAssign.getDataNoWatch(watcher, node, stat);
164 if (data == null) {
165 throw new IOException("Data is null, splitting node " + node + " no longer exists");
166 }
167 RegionTransition rt = RegionTransition.parseFrom(data);
168 EventType et = rt.getEventType();
169 if (et == RS_ZK_REGION_SPLITTING) {
170 ServerName serverName = rt.getServerName();
171 if (!serverName.equals(expectedServer)) {
172 throw new IOException("Splitting node " + node + " is for " + serverName + ", not us "
173 + expectedServer);
174 }
175 byte[] payloadOfSplitting = rt.getPayload();
176 List<HRegionInfo> splittingRegions =
177 HRegionInfo.parseDelimitedFrom(payloadOfSplitting, 0, payloadOfSplitting.length);
178 assert splittingRegions.size() == 2;
179 HRegionInfo a = splittingRegions.get(0);
180 HRegionInfo b = splittingRegions.get(1);
181 if (!(hri_a.equals(a) && hri_b.equals(b))) {
182 throw new IOException("Splitting node " + node + " is for " + a + ", " + b
183 + ", not expected daughters: " + hri_a + ", " + hri_b);
184 }
185
186 zstd.setZnodeVersion(stat.getVersion());
187 return;
188 }
189 if (et != RS_ZK_REQUEST_REGION_SPLIT) {
190 throw new IOException("Splitting node " + node + " moved out of splitting to " + et);
191 }
192 }
193
194 throw new IOException("Server is " + (services.isStopping() ? "stopping" : "stopped"));
195 } catch (Exception e) {
196 if (e instanceof InterruptedException) {
197 Thread.currentThread().interrupt();
198 }
199 throw new IOException("Failed getting SPLITTING znode on " + parent.getRegionNameAsString(),
200 e);
201 }
202 }
203
204
205
206
207
208
209
210
211
212
213
214
215 @Override
216 public void completeSplitTransaction(final RegionServerServices services, HRegion a, HRegion b,
217 SplitTransactionDetails std, HRegion parent) throws IOException {
218 ZkSplitTransactionDetails zstd = (ZkSplitTransactionDetails) std;
219
220 if (coordinationManager.getServer() != null) {
221 try {
222 zstd.setZnodeVersion(transitionSplittingNode(parent.getRegionInfo(), a.getRegionInfo(),
223 b.getRegionInfo(), coordinationManager.getServer().getServerName(), zstd,
224 RS_ZK_REGION_SPLITTING, RS_ZK_REGION_SPLIT));
225
226 int spins = 0;
227
228
229
230 do {
231 if (spins % 10 == 0) {
232 LOG.debug("Still waiting on the master to process the split for "
233 + parent.getRegionInfo().getEncodedName());
234 }
235 Thread.sleep(100);
236
237 zstd.setZnodeVersion(transitionSplittingNode(parent.getRegionInfo(), a.getRegionInfo(),
238 b.getRegionInfo(), coordinationManager.getServer().getServerName(), zstd,
239 RS_ZK_REGION_SPLIT, RS_ZK_REGION_SPLIT));
240 spins++;
241 } while (zstd.getZnodeVersion() != -1 && !coordinationManager.getServer().isStopped()
242 && !services.isStopping());
243 } catch (Exception e) {
244 if (e instanceof InterruptedException) {
245 Thread.currentThread().interrupt();
246 }
247 throw new IOException("Failed telling master about split", e);
248 }
249 }
250
251
252
253
254 }
255
256 @Override
257 public void clean(final HRegionInfo hri) {
258 try {
259
260 if (!ZKAssign.deleteNode(coordinationManager.getServer().getZooKeeper(),
261 hri.getEncodedName(), RS_ZK_REQUEST_REGION_SPLIT, coordinationManager.getServer()
262 .getServerName())) {
263 ZKAssign.deleteNode(coordinationManager.getServer().getZooKeeper(), hri.getEncodedName(),
264 RS_ZK_REGION_SPLITTING, coordinationManager.getServer().getServerName());
265 }
266 } catch (KeeperException.NoNodeException e) {
267 LOG.info("Failed cleanup zk node of " + hri.getRegionNameAsString(), e);
268 } catch (KeeperException e) {
269 coordinationManager.getServer().abort("Failed cleanup of " + hri.getRegionNameAsString(), e);
270 }
271 }
272
273
274
275
276
277 public static class ZkSplitTransactionDetails implements
278 SplitTransactionCoordination.SplitTransactionDetails {
279 private int znodeVersion;
280
281 public ZkSplitTransactionDetails() {
282 }
283
284
285
286
287 public int getZnodeVersion() {
288 return znodeVersion;
289 }
290
291
292
293
294 public void setZnodeVersion(int znodeVersion) {
295 this.znodeVersion = znodeVersion;
296 }
297 }
298
299 @Override
300 public SplitTransactionDetails getDefaultDetails() {
301 ZkSplitTransactionDetails zstd = new ZkSplitTransactionDetails();
302 zstd.setZnodeVersion(-1);
303 return zstd;
304 }
305
306 @Override
307 public int processTransition(HRegionInfo p, HRegionInfo hri_a, HRegionInfo hri_b, ServerName sn,
308 SplitTransactionDetails std) throws IOException {
309 return transitionSplittingNode(p, hri_a, hri_b, sn, std, RS_ZK_REQUEST_REGION_SPLIT,
310 RS_ZK_REGION_SPLITTING);
311
312 }
313 }