1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.List;
24 import java.util.ListIterator;
25 import java.util.Map;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.classification.InterfaceAudience;
30 import org.apache.hadoop.fs.Path;
31 import org.apache.hadoop.hbase.HRegionInfo;
32 import org.apache.hadoop.hbase.RegionTransition;
33 import org.apache.hadoop.hbase.Server;
34 import org.apache.hadoop.hbase.ServerName;
35 import org.apache.hadoop.hbase.catalog.MetaEditor;
36 import org.apache.hadoop.hbase.catalog.MetaReader;
37 import org.apache.hadoop.hbase.executor.EventType;
38 import org.apache.hadoop.hbase.regionserver.SplitTransaction.LoggingProgressable;
39 import org.apache.hadoop.hbase.util.Bytes;
40 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
41 import org.apache.hadoop.hbase.util.Pair;
42 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
43 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
44 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
45 import org.apache.zookeeper.KeeperException;
46 import org.apache.zookeeper.KeeperException.NodeExistsException;
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76 @InterfaceAudience.Private
77 public class RegionMergeTransaction {
78 private static final Log LOG = LogFactory.getLog(RegionMergeTransaction.class);
79
80
81 private HRegionInfo mergedRegionInfo;
82
83 private final HRegion region_a;
84 private final HRegion region_b;
85
86 private final Path mergesdir;
87 private int znodeVersion = -1;
88
89 private final boolean forcible;
90
91
92
93
94
95 enum JournalEntry {
96
97
98
99 SET_MERGING_IN_ZK,
100
101
102
103 CREATED_MERGE_DIR,
104
105
106
107 CLOSED_REGION_A,
108
109
110
111 OFFLINED_REGION_A,
112
113
114
115 CLOSED_REGION_B,
116
117
118
119 OFFLINED_REGION_B,
120
121
122
123 STARTED_MERGED_REGION_CREATION,
124
125
126
127
128 PONR
129 }
130
131
132
133
134 private final List<JournalEntry> journal = new ArrayList<JournalEntry>();
135
136 private static IOException closedByOtherException = new IOException(
137 "Failed to close region: already closed by another thread");
138
139
140
141
142
143
144
145 public RegionMergeTransaction(final HRegion a, final HRegion b,
146 final boolean forcible) {
147 if (a.getRegionInfo().compareTo(b.getRegionInfo()) <= 0) {
148 this.region_a = a;
149 this.region_b = b;
150 } else {
151 this.region_a = b;
152 this.region_b = a;
153 }
154 this.forcible = forcible;
155 this.mergesdir = region_a.getRegionFileSystem().getMergesDir();
156 }
157
158
159
160
161
162
163
164 public boolean prepare(final RegionServerServices services) {
165 if (!region_a.getTableDesc().getNameAsString()
166 .equals(region_b.getTableDesc().getNameAsString())) {
167 LOG.info("Can't merge regions " + region_a + "," + region_b
168 + " because they do not belong to the same table");
169 return false;
170 }
171 if (region_a.getRegionInfo().equals(region_b.getRegionInfo())) {
172 LOG.info("Can't merge the same region " + region_a);
173 return false;
174 }
175 if (!forcible && !HRegionInfo.areAdjacent(region_a.getRegionInfo(),
176 region_b.getRegionInfo())) {
177 String msg = "Skip merging " + this.region_a.getRegionNameAsString()
178 + " and " + this.region_b.getRegionNameAsString()
179 + ", because they are not adjacent.";
180 LOG.info(msg);
181 return false;
182 }
183 if (!this.region_a.isMergeable() || !this.region_b.isMergeable()) {
184 return false;
185 }
186 try {
187 boolean regionAHasMergeQualifier = hasMergeQualifierInMeta(services,
188 region_a.getRegionName());
189 if (regionAHasMergeQualifier ||
190 hasMergeQualifierInMeta(services, region_b.getRegionName())) {
191 LOG.debug("Region " + (regionAHasMergeQualifier ? region_a.getRegionNameAsString()
192 : region_b.getRegionNameAsString())
193 + " is not mergeable because it has merge qualifier in META");
194 return false;
195 }
196 } catch (IOException e) {
197 LOG.warn("Failed judging whether merge transaction is available for "
198 + region_a.getRegionNameAsString() + " and "
199 + region_b.getRegionNameAsString(), e);
200 return false;
201 }
202
203
204
205
206
207
208
209
210 this.mergedRegionInfo = getMergedRegionInfo(region_a.getRegionInfo(),
211 region_b.getRegionInfo());
212 return true;
213 }
214
215
216
217
218
219
220
221
222
223
224
225
226 public HRegion execute(final Server server,
227 final RegionServerServices services) throws IOException {
228 HRegion mergedRegion = createMergedRegion(server, services);
229 openMergedRegion(server, services, mergedRegion);
230 transitionZKNode(server, services);
231 return mergedRegion;
232 }
233
234
235
236
237
238
239
240
241
242
243 HRegion createMergedRegion(final Server server,
244 final RegionServerServices services) throws IOException {
245 LOG.info("Starting merge of " + region_a + " and "
246 + region_b.getRegionNameAsString() + ", forcible=" + forcible);
247 if ((server != null && server.isStopped())
248 || (services != null && services.isStopping())) {
249 throw new IOException("Server is stopped or stopping");
250 }
251
252
253 boolean testing = server == null ? true : server.getConfiguration()
254 .getBoolean("hbase.testing.nocluster", false);
255
256
257
258 if (server != null && server.getZooKeeper() != null) {
259 try {
260 createNodeMerging(server.getZooKeeper(), this.mergedRegionInfo,
261 server.getServerName());
262 } catch (KeeperException e) {
263 throw new IOException("Failed creating MERGING znode on "
264 + this.mergedRegionInfo.getRegionNameAsString(), e);
265 }
266 }
267 this.journal.add(JournalEntry.SET_MERGING_IN_ZK);
268 if (server != null && server.getZooKeeper() != null) {
269 try {
270
271
272
273
274
275 this.znodeVersion = transitionNodeMerging(server.getZooKeeper(),
276 this.mergedRegionInfo, server.getServerName(), -1);
277 } catch (KeeperException e) {
278 throw new IOException("Failed setting MERGING znode on "
279 + this.mergedRegionInfo.getRegionNameAsString(), e);
280 }
281 }
282
283 this.region_a.getRegionFileSystem().createMergesDir();
284 this.journal.add(JournalEntry.CREATED_MERGE_DIR);
285
286 Map<byte[], List<StoreFile>> hstoreFilesOfRegionA = closeAndOfflineRegion(
287 services, this.region_a, true, testing);
288 Map<byte[], List<StoreFile>> hstoreFilesOfRegionB = closeAndOfflineRegion(
289 services, this.region_b, false, testing);
290
291 assert hstoreFilesOfRegionA != null && hstoreFilesOfRegionB != null;
292
293
294
295
296
297
298 mergeStoreFiles(hstoreFilesOfRegionA, hstoreFilesOfRegionB);
299
300
301
302
303
304 this.journal.add(JournalEntry.STARTED_MERGED_REGION_CREATION);
305 HRegion mergedRegion = createMergedRegionFromMerges(this.region_a,
306 this.region_b, this.mergedRegionInfo);
307
308
309
310
311
312 this.journal.add(JournalEntry.PONR);
313
314
315
316
317
318
319 if (!testing) {
320 MetaEditor.mergeRegions(server.getCatalogTracker(),
321 mergedRegion.getRegionInfo(), region_a.getRegionInfo(),
322 region_b.getRegionInfo(), server.getServerName());
323 }
324 return mergedRegion;
325 }
326
327
328
329
330
331
332
333
334
335
336 HRegion createMergedRegionFromMerges(final HRegion a, final HRegion b,
337 final HRegionInfo mergedRegion) throws IOException {
338 return a.createMergedRegionFromMerges(mergedRegion, b);
339 }
340
341
342
343
344
345
346
347
348
349
350 private Map<byte[], List<StoreFile>> closeAndOfflineRegion(
351 final RegionServerServices services, final HRegion region,
352 final boolean isRegionA, final boolean testing) throws IOException {
353 Map<byte[], List<StoreFile>> hstoreFilesToMerge = null;
354 Exception exceptionToThrow = null;
355 try {
356 hstoreFilesToMerge = region.close(false);
357 } catch (Exception e) {
358 exceptionToThrow = e;
359 }
360 if (exceptionToThrow == null && hstoreFilesToMerge == null) {
361
362
363
364
365
366 exceptionToThrow = closedByOtherException;
367 }
368 if (exceptionToThrow != closedByOtherException) {
369 this.journal.add(isRegionA ? JournalEntry.CLOSED_REGION_A
370 : JournalEntry.CLOSED_REGION_B);
371 }
372 if (exceptionToThrow != null) {
373 if (exceptionToThrow instanceof IOException)
374 throw (IOException) exceptionToThrow;
375 throw new IOException(exceptionToThrow);
376 }
377
378 if (!testing) {
379 services.removeFromOnlineRegions(region, null);
380 }
381 this.journal.add(isRegionA ? JournalEntry.OFFLINED_REGION_A
382 : JournalEntry.OFFLINED_REGION_B);
383 return hstoreFilesToMerge;
384 }
385
386
387
388
389
390
391
392 public static HRegionInfo getMergedRegionInfo(final HRegionInfo a,
393 final HRegionInfo b) {
394 long rid = EnvironmentEdgeManager.currentTimeMillis();
395
396
397 if (rid < a.getRegionId() || rid < b.getRegionId()) {
398 LOG.warn("Clock skew; merging regions id are " + a.getRegionId()
399 + " and " + b.getRegionId() + ", but current time here is " + rid);
400 rid = Math.max(a.getRegionId(), b.getRegionId()) + 1;
401 }
402
403 byte[] startKey = null;
404 byte[] endKey = null;
405 if (a.compareTo(b) <= 0) {
406 startKey = a.getStartKey();
407 endKey = b.getEndKey();
408 } else {
409 startKey = b.getStartKey();
410 endKey = a.getEndKey();
411 }
412
413
414 HRegionInfo mergedRegionInfo = new HRegionInfo(a.getTableName(), startKey,
415 endKey, false, rid);
416 return mergedRegionInfo;
417 }
418
419
420
421
422
423
424
425
426
427
428 void openMergedRegion(final Server server,
429 final RegionServerServices services, HRegion merged) throws IOException {
430 boolean stopped = server != null && server.isStopped();
431 boolean stopping = services != null && services.isStopping();
432 if (stopped || stopping) {
433 LOG.info("Not opening merged region " + merged.getRegionNameAsString()
434 + " because stopping=" + stopping + ", stopped=" + stopped);
435 return;
436 }
437 HRegionInfo hri = merged.getRegionInfo();
438 LoggingProgressable reporter = server == null ? null
439 : new LoggingProgressable(hri, server.getConfiguration().getLong(
440 "hbase.regionserver.regionmerge.open.log.interval", 10000));
441 merged.openHRegion(reporter);
442
443 if (services != null) {
444 try {
445 services.postOpenDeployTasks(merged, server.getCatalogTracker());
446 services.addToOnlineRegions(merged);
447 } catch (KeeperException ke) {
448 throw new IOException(ke);
449 }
450 }
451
452 }
453
454
455
456
457
458
459
460
461
462 void transitionZKNode(final Server server, final RegionServerServices services)
463 throws IOException {
464 if (server == null || server.getZooKeeper() == null) {
465 return;
466 }
467
468
469 try {
470 this.znodeVersion = transitionNodeMerge(server.getZooKeeper(),
471 this.mergedRegionInfo, region_a.getRegionInfo(),
472 region_b.getRegionInfo(), server.getServerName(), this.znodeVersion);
473
474 long startTime = EnvironmentEdgeManager.currentTimeMillis();
475 int spins = 0;
476
477
478
479 do {
480 if (spins % 10 == 0) {
481 LOG.debug("Still waiting on the master to process the merge for "
482 + this.mergedRegionInfo.getEncodedName() + ", waited "
483 + (EnvironmentEdgeManager.currentTimeMillis() - startTime) + "ms");
484 }
485 Thread.sleep(100);
486
487 this.znodeVersion = tickleNodeMerge(server.getZooKeeper(),
488 this.mergedRegionInfo, region_a.getRegionInfo(),
489 region_b.getRegionInfo(), server.getServerName(), this.znodeVersion);
490 spins++;
491 } while (this.znodeVersion != -1 && !server.isStopped()
492 && !services.isStopping());
493 } catch (Exception e) {
494 if (e instanceof InterruptedException) {
495 Thread.currentThread().interrupt();
496 }
497 throw new IOException("Failed telling master about merge "
498 + mergedRegionInfo.getEncodedName(), e);
499 }
500
501
502
503
504
505 }
506
507
508
509
510
511
512
513 private void mergeStoreFiles(
514 Map<byte[], List<StoreFile>> hstoreFilesOfRegionA,
515 Map<byte[], List<StoreFile>> hstoreFilesOfRegionB)
516 throws IOException {
517
518 HRegionFileSystem fs_a = this.region_a.getRegionFileSystem();
519 for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesOfRegionA
520 .entrySet()) {
521 String familyName = Bytes.toString(entry.getKey());
522 for (StoreFile storeFile : entry.getValue()) {
523 fs_a.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
524 this.mergesdir);
525 }
526 }
527
528 HRegionFileSystem fs_b = this.region_b.getRegionFileSystem();
529 for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesOfRegionB
530 .entrySet()) {
531 String familyName = Bytes.toString(entry.getKey());
532 for (StoreFile storeFile : entry.getValue()) {
533 fs_b.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
534 this.mergesdir);
535 }
536 }
537 }
538
539
540
541
542
543
544
545
546
547 public boolean rollback(final Server server,
548 final RegionServerServices services) throws IOException {
549 assert this.mergedRegionInfo != null;
550 boolean result = true;
551 ListIterator<JournalEntry> iterator = this.journal
552 .listIterator(this.journal.size());
553
554 while (iterator.hasPrevious()) {
555 JournalEntry je = iterator.previous();
556 switch (je) {
557
558 case SET_MERGING_IN_ZK:
559 if (server != null && server.getZooKeeper() != null) {
560 cleanZK(server, this.mergedRegionInfo);
561 }
562 break;
563
564 case CREATED_MERGE_DIR:
565 this.region_a.writestate.writesEnabled = true;
566 this.region_b.writestate.writesEnabled = true;
567 this.region_a.getRegionFileSystem().cleanupMergesDir();
568 break;
569
570 case CLOSED_REGION_A:
571 try {
572
573
574
575
576
577 this.region_a.initialize();
578 } catch (IOException e) {
579 LOG.error("Failed rollbacking CLOSED_REGION_A of region "
580 + this.region_a.getRegionNameAsString(), e);
581 throw new RuntimeException(e);
582 }
583 break;
584
585 case OFFLINED_REGION_A:
586 if (services != null)
587 services.addToOnlineRegions(this.region_a);
588 break;
589
590 case CLOSED_REGION_B:
591 try {
592 this.region_b.initialize();
593 } catch (IOException e) {
594 LOG.error("Failed rollbacking CLOSED_REGION_A of region "
595 + this.region_b.getRegionNameAsString(), e);
596 throw new RuntimeException(e);
597 }
598 break;
599
600 case OFFLINED_REGION_B:
601 if (services != null)
602 services.addToOnlineRegions(this.region_b);
603 break;
604
605 case STARTED_MERGED_REGION_CREATION:
606 this.region_a.getRegionFileSystem().cleanupMergedRegion(
607 this.mergedRegionInfo);
608 break;
609
610 case PONR:
611
612
613 return false;
614
615 default:
616 throw new RuntimeException("Unhandled journal entry: " + je);
617 }
618 }
619 return result;
620 }
621
622 HRegionInfo getMergedRegionInfo() {
623 return this.mergedRegionInfo;
624 }
625
626
627 Path getMergesDir() {
628 return this.mergesdir;
629 }
630
631 private static void cleanZK(final Server server, final HRegionInfo hri) {
632 try {
633
634 ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
635 EventType.RS_ZK_REGION_MERGING);
636 } catch (KeeperException.NoNodeException e) {
637 LOG.warn("Failed cleanup zk node of " + hri.getRegionNameAsString(), e);
638 } catch (KeeperException e) {
639 server.abort("Failed cleanup zk node of " + hri.getRegionNameAsString(),e);
640 }
641
642 }
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659 int createNodeMerging(final ZooKeeperWatcher zkw, final HRegionInfo region,
660 final ServerName serverName) throws KeeperException, IOException {
661 LOG.debug(zkw.prefix("Creating ephemeral node for "
662 + region.getEncodedName() + " in MERGING state"));
663 RegionTransition rt = RegionTransition.createRegionTransition(
664 EventType.RS_ZK_REGION_MERGING, region.getRegionName(), serverName);
665 String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
666 if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
667 throw new IOException("Failed create of ephemeral " + node);
668 }
669
670
671 return transitionNodeMerging(zkw, region, serverName, -1);
672 }
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713 private static int transitionNodeMerge(ZooKeeperWatcher zkw,
714 HRegionInfo merged, HRegionInfo a, HRegionInfo b, ServerName serverName,
715 final int znodeVersion) throws KeeperException, IOException {
716 byte[] payload = HRegionInfo.toDelimitedByteArray(merged, a, b);
717 return ZKAssign.transitionNode(zkw, merged, serverName,
718 EventType.RS_ZK_REGION_MERGING, EventType.RS_ZK_REGION_MERGE,
719 znodeVersion, payload);
720 }
721
722
723
724
725
726
727
728
729
730
731
732 int transitionNodeMerging(final ZooKeeperWatcher zkw,
733 final HRegionInfo parent, final ServerName serverName, final int version)
734 throws KeeperException, IOException {
735 return ZKAssign.transitionNode(zkw, parent, serverName,
736 EventType.RS_ZK_REGION_MERGING, EventType.RS_ZK_REGION_MERGING,
737 version);
738 }
739
740 private static int tickleNodeMerge(ZooKeeperWatcher zkw, HRegionInfo merged,
741 HRegionInfo a, HRegionInfo b, ServerName serverName,
742 final int znodeVersion) throws KeeperException, IOException {
743 byte[] payload = HRegionInfo.toDelimitedByteArray(a, b);
744 return ZKAssign.transitionNode(zkw, merged, serverName,
745 EventType.RS_ZK_REGION_MERGE, EventType.RS_ZK_REGION_MERGE,
746 znodeVersion, payload);
747 }
748
749
750
751
752
753
754
755
756
757 boolean hasMergeQualifierInMeta(final RegionServerServices services,
758 final byte[] regionName) throws IOException {
759
760
761 Pair<HRegionInfo, HRegionInfo> mergeRegions = MetaReader
762 .getRegionsFromMergeQualifier(services.getCatalogTracker(), regionName);
763 if (mergeRegions != null &&
764 (mergeRegions.getFirst() != null || mergeRegions.getSecond() != null)) {
765
766 return true;
767 }
768 return false;
769 }
770 }
771