1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.regionserver.snapshot;
19
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.Collection;
23 import java.util.List;
24 import java.util.concurrent.Callable;
25 import java.util.concurrent.ExecutionException;
26 import java.util.concurrent.ExecutorCompletionService;
27 import java.util.concurrent.Future;
28 import java.util.concurrent.LinkedBlockingQueue;
29 import java.util.concurrent.ThreadPoolExecutor;
30 import java.util.concurrent.TimeUnit;
31
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.hadoop.classification.InterfaceAudience;
35 import org.apache.hadoop.classification.InterfaceStability;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.hbase.DaemonThreadFactory;
38 import org.apache.hadoop.hbase.TableName;
39 import org.apache.hadoop.hbase.errorhandling.ForeignException;
40 import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
41 import org.apache.hadoop.hbase.master.snapshot.MasterSnapshotVerifier;
42 import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
43 import org.apache.hadoop.hbase.procedure.ProcedureMember;
44 import org.apache.hadoop.hbase.procedure.ProcedureMemberRpcs;
45 import org.apache.hadoop.hbase.procedure.RegionServerProcedureManager;
46 import org.apache.hadoop.hbase.procedure.Subprocedure;
47 import org.apache.hadoop.hbase.procedure.SubprocedureFactory;
48 import org.apache.hadoop.hbase.procedure.ZKProcedureMemberRpcs;
49 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
50 import org.apache.hadoop.hbase.regionserver.HRegion;
51 import org.apache.hadoop.hbase.regionserver.HRegionServer;
52 import org.apache.hadoop.hbase.regionserver.RegionServerServices;
53 import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
54 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
55 import org.apache.zookeeper.KeeperException;
56
57 import com.google.protobuf.InvalidProtocolBufferException;
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72 @InterfaceAudience.Private
73 @InterfaceStability.Unstable
74 public class RegionServerSnapshotManager extends RegionServerProcedureManager {
75 private static final Log LOG = LogFactory.getLog(RegionServerSnapshotManager.class);
76
77
78 private static final String CONCURENT_SNAPSHOT_TASKS_KEY = "hbase.snapshot.region.concurrentTasks";
79 private static final int DEFAULT_CONCURRENT_SNAPSHOT_TASKS = 3;
80
81
82 public static final String SNAPSHOT_REQUEST_THREADS_KEY = "hbase.snapshot.region.pool.threads";
83
84 public static final int SNAPSHOT_REQUEST_THREADS_DEFAULT = 10;
85
86
87 public static final String SNAPSHOT_TIMEOUT_MILLIS_KEY = "hbase.snapshot.region.timeout";
88
89 public static final long SNAPSHOT_TIMEOUT_MILLIS_DEFAULT = 60000;
90
91
92 public static final String SNAPSHOT_REQUEST_WAKE_MILLIS_KEY = "hbase.snapshot.region.wakefrequency";
93
94 private static final long SNAPSHOT_REQUEST_WAKE_MILLIS_DEFAULT = 500;
95
96 private RegionServerServices rss;
97 private ProcedureMemberRpcs memberRpcs;
98 private ProcedureMember member;
99
100
101
102
103
104
105
106
107 RegionServerSnapshotManager(Configuration conf, HRegionServer parent,
108 ProcedureMemberRpcs memberRpc, ProcedureMember procMember) {
109 this.rss = parent;
110 this.memberRpcs = memberRpc;
111 this.member = procMember;
112 }
113
114 public RegionServerSnapshotManager() {}
115
116
117
118
119 @Override
120 public void start() {
121 LOG.debug("Start Snapshot Manager " + rss.getServerName().toString());
122 this.memberRpcs.start(rss.getServerName().toString(), member);
123 }
124
125
126
127
128
129
130 @Override
131 public void stop(boolean force) throws IOException {
132 String mode = force ? "abruptly" : "gracefully";
133 LOG.info("Stopping RegionServerSnapshotManager " + mode + ".");
134
135 try {
136 this.member.close();
137 } finally {
138 this.memberRpcs.close();
139 }
140 }
141
142
143
144
145
146
147
148
149
150
151
152 public Subprocedure buildSubprocedure(SnapshotDescription snapshot) {
153
154
155 if (rss.isStopping() || rss.isStopped()) {
156 throw new IllegalStateException("Can't start snapshot on RS: " + rss.getServerName()
157 + ", because stopping/stopped!");
158 }
159
160
161
162 List<HRegion> involvedRegions;
163 try {
164 involvedRegions = getRegionsToSnapshot(snapshot);
165 } catch (IOException e1) {
166 throw new IllegalStateException("Failed to figure out if we should handle a snapshot - "
167 + "something has gone awry with the online regions.", e1);
168 }
169
170
171
172
173
174 LOG.debug("Launching subprocedure for snapshot " + snapshot.getName() + " from table "
175 + snapshot.getTable());
176 ForeignExceptionDispatcher exnDispatcher = new ForeignExceptionDispatcher(snapshot.getName());
177 Configuration conf = rss.getConfiguration();
178 long timeoutMillis = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY,
179 SNAPSHOT_TIMEOUT_MILLIS_DEFAULT);
180 long wakeMillis = conf.getLong(SNAPSHOT_REQUEST_WAKE_MILLIS_KEY,
181 SNAPSHOT_REQUEST_WAKE_MILLIS_DEFAULT);
182
183 switch (snapshot.getType()) {
184 case FLUSH:
185 SnapshotSubprocedurePool taskManager =
186 new SnapshotSubprocedurePool(rss.getServerName().toString(), conf);
187 return new FlushSnapshotSubprocedure(member, exnDispatcher, wakeMillis,
188 timeoutMillis, involvedRegions, snapshot, taskManager);
189 default:
190 throw new UnsupportedOperationException("Unrecognized snapshot type:" + snapshot.getType());
191 }
192 }
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209 private List<HRegion> getRegionsToSnapshot(SnapshotDescription snapshot) throws IOException {
210 return rss.getOnlineRegions(TableName.valueOf(snapshot.getTable()));
211 }
212
213
214
215
216 public class SnapshotSubprocedureBuilder implements SubprocedureFactory {
217
218 @Override
219 public Subprocedure buildSubprocedure(String name, byte[] data) {
220 try {
221
222 SnapshotDescription snapshot = SnapshotDescription.parseFrom(data);
223 return RegionServerSnapshotManager.this.buildSubprocedure(snapshot);
224 } catch (InvalidProtocolBufferException e) {
225 throw new IllegalArgumentException("Could not read snapshot information from request.");
226 }
227 }
228
229 }
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244 static class SnapshotSubprocedurePool {
245 private final ExecutorCompletionService<Void> taskPool;
246 private final ThreadPoolExecutor executor;
247 private volatile boolean stopped;
248 private final List<Future<Void>> futures = new ArrayList<Future<Void>>();
249 private final String name;
250
251 SnapshotSubprocedurePool(String name, Configuration conf) {
252
253 long keepAlive = conf.getLong(
254 RegionServerSnapshotManager.SNAPSHOT_TIMEOUT_MILLIS_KEY,
255 RegionServerSnapshotManager.SNAPSHOT_TIMEOUT_MILLIS_DEFAULT);
256 int threads = conf.getInt(CONCURENT_SNAPSHOT_TASKS_KEY, DEFAULT_CONCURRENT_SNAPSHOT_TASKS);
257 this.name = name;
258 executor = new ThreadPoolExecutor(1, threads, keepAlive, TimeUnit.MILLISECONDS,
259 new LinkedBlockingQueue<Runnable>(), new DaemonThreadFactory("rs("
260 + name + ")-snapshot-pool"));
261 taskPool = new ExecutorCompletionService<Void>(executor);
262 }
263
264 boolean hasTasks() {
265 return futures.size() != 0;
266 }
267
268
269
270
271
272
273
274 void submitTask(final Callable<Void> task) {
275 Future<Void> f = this.taskPool.submit(task);
276 futures.add(f);
277 }
278
279
280
281
282
283
284
285
286
287 boolean waitForOutstandingTasks() throws ForeignException, InterruptedException {
288 LOG.debug("Waiting for local region snapshots to finish.");
289
290 int sz = futures.size();
291 try {
292
293 for (int i = 0; i < sz; i++) {
294 Future<Void> f = taskPool.take();
295 f.get();
296 if (!futures.remove(f)) {
297 LOG.warn("unexpected future" + f);
298 }
299 LOG.debug("Completed " + (i+1) + "/" + sz + " local region snapshots.");
300 }
301 LOG.debug("Completed " + sz + " local region snapshots.");
302 return true;
303 } catch (InterruptedException e) {
304 LOG.warn("Got InterruptedException in SnapshotSubprocedurePool", e);
305 if (!stopped) {
306 Thread.currentThread().interrupt();
307 throw new ForeignException("SnapshotSubprocedurePool", e);
308 }
309
310 } catch (ExecutionException e) {
311 if (e.getCause() instanceof ForeignException) {
312 LOG.warn("Rethrowing ForeignException from SnapshotSubprocedurePool", e);
313 throw (ForeignException)e.getCause();
314 }
315 LOG.warn("Got Exception in SnapshotSubprocedurePool", e);
316 throw new ForeignException(name, e.getCause());
317 } finally {
318 cancelTasks();
319 }
320 return false;
321 }
322
323
324
325
326
327 void cancelTasks() throws InterruptedException {
328 Collection<Future<Void>> tasks = futures;
329 LOG.debug("cancelling " + tasks.size() + " tasks for snapshot " + name);
330 for (Future<Void> f: tasks) {
331
332
333
334
335 f.cancel(false);
336 }
337
338
339 while (!futures.isEmpty()) {
340
341 LOG.warn("Removing cancelled elements from taskPool");
342 futures.remove(taskPool.take());
343 }
344 stop();
345 }
346
347
348
349
350 void stop() {
351 if (this.stopped) return;
352
353 this.stopped = true;
354 this.executor.shutdownNow();
355 }
356 }
357
358
359
360
361
362
363 @Override
364 public void initialize(RegionServerServices rss) throws KeeperException {
365 this.rss = rss;
366 ZooKeeperWatcher zkw = rss.getZooKeeper();
367 this.memberRpcs = new ZKProcedureMemberRpcs(zkw,
368 SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION);
369
370
371 Configuration conf = rss.getConfiguration();
372 long keepAlive = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY, SNAPSHOT_TIMEOUT_MILLIS_DEFAULT);
373 int opThreads = conf.getInt(SNAPSHOT_REQUEST_THREADS_KEY, SNAPSHOT_REQUEST_THREADS_DEFAULT);
374
375
376 ThreadPoolExecutor pool = ProcedureMember.defaultPool(rss.getServerName().toString(),
377 opThreads, keepAlive);
378 this.member = new ProcedureMember(memberRpcs, pool, new SnapshotSubprocedureBuilder());
379 }
380
381 @Override
382 public String getProcedureSignature() {
383 return SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION;
384 }
385
386 }