1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.List;
24 import java.util.ListIterator;
25 import java.util.Map;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.classification.InterfaceAudience;
30 import org.apache.hadoop.fs.Path;
31 import org.apache.hadoop.hbase.HConstants;
32 import org.apache.hadoop.hbase.HRegionInfo;
33 import org.apache.hadoop.hbase.RegionTransition;
34 import org.apache.hadoop.hbase.Server;
35 import org.apache.hadoop.hbase.ServerName;
36 import org.apache.hadoop.hbase.catalog.MetaEditor;
37 import org.apache.hadoop.hbase.catalog.MetaReader;
38 import org.apache.hadoop.hbase.executor.EventType;
39 import org.apache.hadoop.hbase.regionserver.SplitTransaction.LoggingProgressable;
40 import org.apache.hadoop.hbase.util.Bytes;
41 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
42 import org.apache.hadoop.hbase.util.Pair;
43 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
44 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
45 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
46 import org.apache.zookeeper.KeeperException;
47 import org.apache.zookeeper.KeeperException.NodeExistsException;
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77 @InterfaceAudience.Private
78 public class RegionMergeTransaction {
79 private static final Log LOG = LogFactory.getLog(RegionMergeTransaction.class);
80
81
82 private HRegionInfo mergedRegionInfo;
83
84 private final HRegion region_a;
85 private final HRegion region_b;
86
87 private final Path mergesdir;
88 private int znodeVersion = -1;
89
90 private final boolean forcible;
91
92
93
94
95
96 enum JournalEntry {
97
98
99
100 SET_MERGING_IN_ZK,
101
102
103
104 CREATED_MERGE_DIR,
105
106
107
108 CLOSED_REGION_A,
109
110
111
112 OFFLINED_REGION_A,
113
114
115
116 CLOSED_REGION_B,
117
118
119
120 OFFLINED_REGION_B,
121
122
123
124 STARTED_MERGED_REGION_CREATION,
125
126
127
128
129 PONR
130 }
131
132
133
134
135 private final List<JournalEntry> journal = new ArrayList<JournalEntry>();
136
137 private static IOException closedByOtherException = new IOException(
138 "Failed to close region: already closed by another thread");
139
140
141
142
143
144
145
146 public RegionMergeTransaction(final HRegion a, final HRegion b,
147 final boolean forcible) {
148 if (a.getRegionInfo().compareTo(b.getRegionInfo()) <= 0) {
149 this.region_a = a;
150 this.region_b = b;
151 } else {
152 this.region_a = b;
153 this.region_b = a;
154 }
155 this.forcible = forcible;
156 this.mergesdir = region_a.getRegionFileSystem().getMergesDir();
157 }
158
159
160
161
162
163
164
165 public boolean prepare(final RegionServerServices services) {
166 if (!region_a.getTableDesc().getTableName()
167 .equals(region_b.getTableDesc().getTableName())) {
168 LOG.info("Can't merge regions " + region_a + "," + region_b
169 + " because they do not belong to the same table");
170 return false;
171 }
172 if (region_a.getRegionInfo().equals(region_b.getRegionInfo())) {
173 LOG.info("Can't merge the same region " + region_a);
174 return false;
175 }
176 if (!forcible && !HRegionInfo.areAdjacent(region_a.getRegionInfo(),
177 region_b.getRegionInfo())) {
178 String msg = "Skip merging " + this.region_a.getRegionNameAsString()
179 + " and " + this.region_b.getRegionNameAsString()
180 + ", because they are not adjacent.";
181 LOG.info(msg);
182 return false;
183 }
184 if (!this.region_a.isMergeable() || !this.region_b.isMergeable()) {
185 return false;
186 }
187 try {
188 boolean regionAHasMergeQualifier = hasMergeQualifierInMeta(services,
189 region_a.getRegionName());
190 if (regionAHasMergeQualifier ||
191 hasMergeQualifierInMeta(services, region_b.getRegionName())) {
192 LOG.debug("Region " + (regionAHasMergeQualifier ? region_a.getRegionNameAsString()
193 : region_b.getRegionNameAsString())
194 + " is not mergeable because it has merge qualifier in META");
195 return false;
196 }
197 } catch (IOException e) {
198 LOG.warn("Failed judging whether merge transaction is available for "
199 + region_a.getRegionNameAsString() + " and "
200 + region_b.getRegionNameAsString(), e);
201 return false;
202 }
203
204
205
206
207
208
209
210
211 this.mergedRegionInfo = getMergedRegionInfo(region_a.getRegionInfo(),
212 region_b.getRegionInfo());
213 return true;
214 }
215
216
217
218
219
220
221
222
223
224
225
226
227 public HRegion execute(final Server server,
228 final RegionServerServices services) throws IOException {
229 HRegion mergedRegion = createMergedRegion(server, services);
230 openMergedRegion(server, services, mergedRegion);
231 transitionZKNode(server, services);
232 return mergedRegion;
233 }
234
235
236
237
238
239
240
241
242
243
244 HRegion createMergedRegion(final Server server,
245 final RegionServerServices services) throws IOException {
246 LOG.info("Starting merge of " + region_a + " and "
247 + region_b.getRegionNameAsString() + ", forcible=" + forcible);
248 if ((server != null && server.isStopped())
249 || (services != null && services.isStopping())) {
250 throw new IOException("Server is stopped or stopping");
251 }
252
253
254 boolean testing = server == null ? true : server.getConfiguration()
255 .getBoolean("hbase.testing.nocluster", false);
256
257
258
259 if (server != null && server.getZooKeeper() != null) {
260 try {
261 createNodeMerging(server.getZooKeeper(), this.mergedRegionInfo,
262 server.getServerName(), region_a.getRegionInfo(), region_b.getRegionInfo());
263 } catch (KeeperException e) {
264 throw new IOException("Failed creating MERGING znode on "
265 + this.mergedRegionInfo.getRegionNameAsString(), e);
266 }
267 }
268 this.journal.add(JournalEntry.SET_MERGING_IN_ZK);
269 if (server != null && server.getZooKeeper() != null) {
270 try {
271
272
273
274
275
276 this.znodeVersion = transitionNodeMerging(server.getZooKeeper(),
277 this.mergedRegionInfo, server.getServerName(), -1,
278 region_a.getRegionInfo(), region_b.getRegionInfo());
279 } catch (KeeperException e) {
280 throw new IOException("Failed setting MERGING znode on "
281 + this.mergedRegionInfo.getRegionNameAsString(), e);
282 }
283 }
284
285 this.region_a.getRegionFileSystem().createMergesDir();
286 this.journal.add(JournalEntry.CREATED_MERGE_DIR);
287
288 Map<byte[], List<StoreFile>> hstoreFilesOfRegionA = closeAndOfflineRegion(
289 services, this.region_a, true, testing);
290 Map<byte[], List<StoreFile>> hstoreFilesOfRegionB = closeAndOfflineRegion(
291 services, this.region_b, false, testing);
292
293 assert hstoreFilesOfRegionA != null && hstoreFilesOfRegionB != null;
294
295
296
297
298
299
300 mergeStoreFiles(hstoreFilesOfRegionA, hstoreFilesOfRegionB);
301
302 if (server != null && server.getZooKeeper() != null) {
303 try {
304
305
306 this.znodeVersion = transitionNodeMerging(server.getZooKeeper(),
307 this.mergedRegionInfo, server.getServerName(), this.znodeVersion,
308 region_a.getRegionInfo(), region_b.getRegionInfo());
309 } catch (KeeperException e) {
310 throw new IOException("Failed setting MERGING znode on "
311 + this.mergedRegionInfo.getRegionNameAsString(), e);
312 }
313 }
314
315
316
317
318
319 this.journal.add(JournalEntry.STARTED_MERGED_REGION_CREATION);
320 HRegion mergedRegion = createMergedRegionFromMerges(this.region_a,
321 this.region_b, this.mergedRegionInfo);
322
323
324
325
326
327 this.journal.add(JournalEntry.PONR);
328
329
330
331
332
333
334 if (!testing) {
335 MetaEditor.mergeRegions(server.getCatalogTracker(),
336 mergedRegion.getRegionInfo(), region_a.getRegionInfo(),
337 region_b.getRegionInfo(), server.getServerName());
338 }
339 return mergedRegion;
340 }
341
342
343
344
345
346
347
348
349
350
351 HRegion createMergedRegionFromMerges(final HRegion a, final HRegion b,
352 final HRegionInfo mergedRegion) throws IOException {
353 return a.createMergedRegionFromMerges(mergedRegion, b);
354 }
355
356
357
358
359
360
361
362
363
364
365 private Map<byte[], List<StoreFile>> closeAndOfflineRegion(
366 final RegionServerServices services, final HRegion region,
367 final boolean isRegionA, final boolean testing) throws IOException {
368 Map<byte[], List<StoreFile>> hstoreFilesToMerge = null;
369 Exception exceptionToThrow = null;
370 try {
371 hstoreFilesToMerge = region.close(false);
372 } catch (Exception e) {
373 exceptionToThrow = e;
374 }
375 if (exceptionToThrow == null && hstoreFilesToMerge == null) {
376
377
378
379
380
381 exceptionToThrow = closedByOtherException;
382 }
383 if (exceptionToThrow != closedByOtherException) {
384 this.journal.add(isRegionA ? JournalEntry.CLOSED_REGION_A
385 : JournalEntry.CLOSED_REGION_B);
386 }
387 if (exceptionToThrow != null) {
388 if (exceptionToThrow instanceof IOException)
389 throw (IOException) exceptionToThrow;
390 throw new IOException(exceptionToThrow);
391 }
392
393 if (!testing) {
394 services.removeFromOnlineRegions(region, null);
395 }
396 this.journal.add(isRegionA ? JournalEntry.OFFLINED_REGION_A
397 : JournalEntry.OFFLINED_REGION_B);
398 return hstoreFilesToMerge;
399 }
400
401
402
403
404
405
406
407 public static HRegionInfo getMergedRegionInfo(final HRegionInfo a,
408 final HRegionInfo b) {
409 long rid = EnvironmentEdgeManager.currentTimeMillis();
410
411
412 if (rid < a.getRegionId() || rid < b.getRegionId()) {
413 LOG.warn("Clock skew; merging regions id are " + a.getRegionId()
414 + " and " + b.getRegionId() + ", but current time here is " + rid);
415 rid = Math.max(a.getRegionId(), b.getRegionId()) + 1;
416 }
417
418 byte[] startKey = null;
419 byte[] endKey = null;
420
421 if (a.compareTo(b) <= 0) {
422 startKey = a.getStartKey();
423 } else {
424 startKey = b.getStartKey();
425 }
426
427 if (Bytes.equals(a.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)
428 || (!Bytes.equals(b.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)
429 && Bytes.compareTo(a.getEndKey(), b.getEndKey()) > 0)) {
430 endKey = a.getEndKey();
431 } else {
432 endKey = b.getEndKey();
433 }
434
435
436 HRegionInfo mergedRegionInfo = new HRegionInfo(a.getTableName(), startKey,
437 endKey, false, rid);
438 return mergedRegionInfo;
439 }
440
441
442
443
444
445
446
447
448
449
450 void openMergedRegion(final Server server,
451 final RegionServerServices services, HRegion merged) throws IOException {
452 boolean stopped = server != null && server.isStopped();
453 boolean stopping = services != null && services.isStopping();
454 if (stopped || stopping) {
455 LOG.info("Not opening merged region " + merged.getRegionNameAsString()
456 + " because stopping=" + stopping + ", stopped=" + stopped);
457 return;
458 }
459 HRegionInfo hri = merged.getRegionInfo();
460 LoggingProgressable reporter = server == null ? null
461 : new LoggingProgressable(hri, server.getConfiguration().getLong(
462 "hbase.regionserver.regionmerge.open.log.interval", 10000));
463 merged.openHRegion(reporter);
464
465 if (services != null) {
466 try {
467 services.postOpenDeployTasks(merged, server.getCatalogTracker());
468 services.addToOnlineRegions(merged);
469 } catch (KeeperException ke) {
470 throw new IOException(ke);
471 }
472 }
473
474 }
475
476
477
478
479
480
481
482
483
484 void transitionZKNode(final Server server, final RegionServerServices services)
485 throws IOException {
486 if (server == null || server.getZooKeeper() == null) {
487 return;
488 }
489
490
491 try {
492 this.znodeVersion = transitionNodeMerge(server.getZooKeeper(),
493 this.mergedRegionInfo, region_a.getRegionInfo(),
494 region_b.getRegionInfo(), server.getServerName(), this.znodeVersion);
495
496 long startTime = EnvironmentEdgeManager.currentTimeMillis();
497 int spins = 0;
498
499
500
501 do {
502 if (spins % 10 == 0) {
503 LOG.debug("Still waiting on the master to process the merge for "
504 + this.mergedRegionInfo.getEncodedName() + ", waited "
505 + (EnvironmentEdgeManager.currentTimeMillis() - startTime) + "ms");
506 }
507 Thread.sleep(100);
508
509 this.znodeVersion = tickleNodeMerge(server.getZooKeeper(),
510 this.mergedRegionInfo, region_a.getRegionInfo(),
511 region_b.getRegionInfo(), server.getServerName(), this.znodeVersion);
512 spins++;
513 } while (this.znodeVersion != -1 && !server.isStopped()
514 && !services.isStopping());
515 } catch (Exception e) {
516 if (e instanceof InterruptedException) {
517 Thread.currentThread().interrupt();
518 }
519 throw new IOException("Failed telling master about merge "
520 + mergedRegionInfo.getEncodedName(), e);
521 }
522
523
524
525
526
527 }
528
529
530
531
532
533
534
535 private void mergeStoreFiles(
536 Map<byte[], List<StoreFile>> hstoreFilesOfRegionA,
537 Map<byte[], List<StoreFile>> hstoreFilesOfRegionB)
538 throws IOException {
539
540 HRegionFileSystem fs_a = this.region_a.getRegionFileSystem();
541 for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesOfRegionA
542 .entrySet()) {
543 String familyName = Bytes.toString(entry.getKey());
544 for (StoreFile storeFile : entry.getValue()) {
545 fs_a.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
546 this.mergesdir);
547 }
548 }
549
550 HRegionFileSystem fs_b = this.region_b.getRegionFileSystem();
551 for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesOfRegionB
552 .entrySet()) {
553 String familyName = Bytes.toString(entry.getKey());
554 for (StoreFile storeFile : entry.getValue()) {
555 fs_b.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
556 this.mergesdir);
557 }
558 }
559 }
560
561
562
563
564
565
566
567
568
569 public boolean rollback(final Server server,
570 final RegionServerServices services) throws IOException {
571 assert this.mergedRegionInfo != null;
572 boolean result = true;
573 ListIterator<JournalEntry> iterator = this.journal
574 .listIterator(this.journal.size());
575
576 while (iterator.hasPrevious()) {
577 JournalEntry je = iterator.previous();
578 switch (je) {
579
580 case SET_MERGING_IN_ZK:
581 if (server != null && server.getZooKeeper() != null) {
582 cleanZK(server, this.mergedRegionInfo);
583 }
584 break;
585
586 case CREATED_MERGE_DIR:
587 this.region_a.writestate.writesEnabled = true;
588 this.region_b.writestate.writesEnabled = true;
589 this.region_a.getRegionFileSystem().cleanupMergesDir();
590 break;
591
592 case CLOSED_REGION_A:
593 try {
594
595
596
597
598
599 this.region_a.initialize();
600 } catch (IOException e) {
601 LOG.error("Failed rollbacking CLOSED_REGION_A of region "
602 + this.region_a.getRegionNameAsString(), e);
603 throw new RuntimeException(e);
604 }
605 break;
606
607 case OFFLINED_REGION_A:
608 if (services != null)
609 services.addToOnlineRegions(this.region_a);
610 break;
611
612 case CLOSED_REGION_B:
613 try {
614 this.region_b.initialize();
615 } catch (IOException e) {
616 LOG.error("Failed rollbacking CLOSED_REGION_A of region "
617 + this.region_b.getRegionNameAsString(), e);
618 throw new RuntimeException(e);
619 }
620 break;
621
622 case OFFLINED_REGION_B:
623 if (services != null)
624 services.addToOnlineRegions(this.region_b);
625 break;
626
627 case STARTED_MERGED_REGION_CREATION:
628 this.region_a.getRegionFileSystem().cleanupMergedRegion(
629 this.mergedRegionInfo);
630 break;
631
632 case PONR:
633
634
635 return false;
636
637 default:
638 throw new RuntimeException("Unhandled journal entry: " + je);
639 }
640 }
641 return result;
642 }
643
644 HRegionInfo getMergedRegionInfo() {
645 return this.mergedRegionInfo;
646 }
647
648
649 Path getMergesDir() {
650 return this.mergesdir;
651 }
652
653 private static void cleanZK(final Server server, final HRegionInfo hri) {
654 try {
655
656 ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
657 EventType.RS_ZK_REGION_MERGING);
658 } catch (KeeperException.NoNodeException e) {
659 LOG.warn("Failed cleanup zk node of " + hri.getRegionNameAsString(), e);
660 } catch (KeeperException e) {
661 server.abort("Failed cleanup zk node of " + hri.getRegionNameAsString(),e);
662 }
663
664 }
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681 int createNodeMerging(final ZooKeeperWatcher zkw, final HRegionInfo region,
682 final ServerName serverName, final HRegionInfo a,
683 final HRegionInfo b) throws KeeperException, IOException {
684 LOG.debug(zkw.prefix("Creating ephemeral node for "
685 + region.getEncodedName() + " in MERGING state"));
686 byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
687 RegionTransition rt = RegionTransition.createRegionTransition(
688 EventType.RS_ZK_REGION_MERGING, region.getRegionName(), serverName, payload);
689 String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
690 if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
691 throw new IOException("Failed create of ephemeral " + node);
692 }
693
694
695 return transitionNodeMerging(zkw, region, serverName, -1, a, b);
696 }
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737 private static int transitionNodeMerge(ZooKeeperWatcher zkw,
738 HRegionInfo merged, HRegionInfo a, HRegionInfo b, ServerName serverName,
739 final int znodeVersion) throws KeeperException, IOException {
740 byte[] payload = HRegionInfo.toDelimitedByteArray(merged, a, b);
741 return ZKAssign.transitionNode(zkw, merged, serverName,
742 EventType.RS_ZK_REGION_MERGING, EventType.RS_ZK_REGION_MERGED,
743 znodeVersion, payload);
744 }
745
746
747
748
749
750
751
752
753
754
755
756 int transitionNodeMerging(final ZooKeeperWatcher zkw,
757 final HRegionInfo parent, final ServerName serverName, final int version,
758 final HRegionInfo a, final HRegionInfo b) throws KeeperException, IOException {
759 byte[] payload = HRegionInfo.toDelimitedByteArray(a, b);
760 return ZKAssign.transitionNode(zkw, parent, serverName,
761 EventType.RS_ZK_REGION_MERGING, EventType.RS_ZK_REGION_MERGING,
762 version, payload);
763 }
764
765 private static int tickleNodeMerge(ZooKeeperWatcher zkw, HRegionInfo merged,
766 HRegionInfo a, HRegionInfo b, ServerName serverName,
767 final int znodeVersion) throws KeeperException, IOException {
768 byte[] payload = HRegionInfo.toDelimitedByteArray(a, b);
769 return ZKAssign.transitionNode(zkw, merged, serverName,
770 EventType.RS_ZK_REGION_MERGED, EventType.RS_ZK_REGION_MERGED,
771 znodeVersion, payload);
772 }
773
774
775
776
777
778
779
780
781
782 boolean hasMergeQualifierInMeta(final RegionServerServices services,
783 final byte[] regionName) throws IOException {
784 if (services == null) return false;
785
786
787 Pair<HRegionInfo, HRegionInfo> mergeRegions = MetaReader
788 .getRegionsFromMergeQualifier(services.getCatalogTracker(), regionName);
789 if (mergeRegions != null &&
790 (mergeRegions.getFirst() != null || mergeRegions.getSecond() != null)) {
791
792 return true;
793 }
794 return false;
795 }
796 }
797