1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_MERGED;
22 import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_MERGING;
23 import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REQUEST_REGION_MERGE;
24
25 import java.io.IOException;
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.ListIterator;
29 import java.util.Map;
30
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 import org.apache.hadoop.classification.InterfaceAudience;
34 import org.apache.hadoop.fs.Path;
35 import org.apache.hadoop.hbase.HConstants;
36 import org.apache.hadoop.hbase.HRegionInfo;
37 import org.apache.hadoop.hbase.RegionTransition;
38 import org.apache.hadoop.hbase.Server;
39 import org.apache.hadoop.hbase.ServerName;
40 import org.apache.hadoop.hbase.catalog.MetaEditor;
41 import org.apache.hadoop.hbase.catalog.MetaReader;
42 import org.apache.hadoop.hbase.executor.EventType;
43 import org.apache.hadoop.hbase.regionserver.SplitTransaction.LoggingProgressable;
44 import org.apache.hadoop.hbase.util.Bytes;
45 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
46 import org.apache.hadoop.hbase.util.Pair;
47 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
48 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
49 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
50 import org.apache.zookeeper.KeeperException;
51 import org.apache.zookeeper.KeeperException.NodeExistsException;
52 import org.apache.zookeeper.data.Stat;
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82 @InterfaceAudience.Private
83 public class RegionMergeTransaction {
84 private static final Log LOG = LogFactory.getLog(RegionMergeTransaction.class);
85
86
87 private HRegionInfo mergedRegionInfo;
88
89 private final HRegion region_a;
90 private final HRegion region_b;
91
92 private final Path mergesdir;
93 private int znodeVersion = -1;
94
95 private final boolean forcible;
96
97
98
99
100
101 enum JournalEntry {
102
103
104
105 SET_MERGING_IN_ZK,
106
107
108
109 CREATED_MERGE_DIR,
110
111
112
113 CLOSED_REGION_A,
114
115
116
117 OFFLINED_REGION_A,
118
119
120
121 CLOSED_REGION_B,
122
123
124
125 OFFLINED_REGION_B,
126
127
128
129 STARTED_MERGED_REGION_CREATION,
130
131
132
133
134 PONR
135 }
136
137
138
139
140 private final List<JournalEntry> journal = new ArrayList<JournalEntry>();
141
142 private static IOException closedByOtherException = new IOException(
143 "Failed to close region: already closed by another thread");
144
145
146
147
148
149
150
151 public RegionMergeTransaction(final HRegion a, final HRegion b,
152 final boolean forcible) {
153 if (a.getRegionInfo().compareTo(b.getRegionInfo()) <= 0) {
154 this.region_a = a;
155 this.region_b = b;
156 } else {
157 this.region_a = b;
158 this.region_b = a;
159 }
160 this.forcible = forcible;
161 this.mergesdir = region_a.getRegionFileSystem().getMergesDir();
162 }
163
164
165
166
167
168
169
170 public boolean prepare(final RegionServerServices services) {
171 if (!region_a.getTableDesc().getTableName()
172 .equals(region_b.getTableDesc().getTableName())) {
173 LOG.info("Can't merge regions " + region_a + "," + region_b
174 + " because they do not belong to the same table");
175 return false;
176 }
177 if (region_a.getRegionInfo().equals(region_b.getRegionInfo())) {
178 LOG.info("Can't merge the same region " + region_a);
179 return false;
180 }
181 if (!forcible && !HRegionInfo.areAdjacent(region_a.getRegionInfo(),
182 region_b.getRegionInfo())) {
183 String msg = "Skip merging " + this.region_a.getRegionNameAsString()
184 + " and " + this.region_b.getRegionNameAsString()
185 + ", because they are not adjacent.";
186 LOG.info(msg);
187 return false;
188 }
189 if (!this.region_a.isMergeable() || !this.region_b.isMergeable()) {
190 return false;
191 }
192 try {
193 boolean regionAHasMergeQualifier = hasMergeQualifierInMeta(services,
194 region_a.getRegionName());
195 if (regionAHasMergeQualifier ||
196 hasMergeQualifierInMeta(services, region_b.getRegionName())) {
197 LOG.debug("Region " + (regionAHasMergeQualifier ? region_a.getRegionNameAsString()
198 : region_b.getRegionNameAsString())
199 + " is not mergeable because it has merge qualifier in META");
200 return false;
201 }
202 } catch (IOException e) {
203 LOG.warn("Failed judging whether merge transaction is available for "
204 + region_a.getRegionNameAsString() + " and "
205 + region_b.getRegionNameAsString(), e);
206 return false;
207 }
208
209
210
211
212
213
214
215
216 this.mergedRegionInfo = getMergedRegionInfo(region_a.getRegionInfo(),
217 region_b.getRegionInfo());
218 return true;
219 }
220
221
222
223
224
225
226
227
228
229
230
231
232 public HRegion execute(final Server server,
233 final RegionServerServices services) throws IOException {
234 HRegion mergedRegion = createMergedRegion(server, services);
235 openMergedRegion(server, services, mergedRegion);
236 transitionZKNode(server, services);
237 return mergedRegion;
238 }
239
240
241
242
243
244
245
246
247
248
249 HRegion createMergedRegion(final Server server,
250 final RegionServerServices services) throws IOException {
251 LOG.info("Starting merge of " + region_a + " and "
252 + region_b.getRegionNameAsString() + ", forcible=" + forcible);
253 if ((server != null && server.isStopped())
254 || (services != null && services.isStopping())) {
255 throw new IOException("Server is stopped or stopping");
256 }
257
258
259 boolean testing = server == null ? true : server.getConfiguration()
260 .getBoolean("hbase.testing.nocluster", false);
261
262
263
264 if (server != null && server.getZooKeeper() != null) {
265 try {
266 createNodeMerging(server.getZooKeeper(), this.mergedRegionInfo,
267 server.getServerName(), region_a.getRegionInfo(), region_b.getRegionInfo());
268 } catch (KeeperException e) {
269 throw new IOException("Failed creating PENDING_MERGE znode on "
270 + this.mergedRegionInfo.getRegionNameAsString(), e);
271 }
272 }
273 this.journal.add(JournalEntry.SET_MERGING_IN_ZK);
274 if (server != null && server.getZooKeeper() != null) {
275
276
277
278 znodeVersion = getZKNode(server, services);
279 }
280
281 this.region_a.getRegionFileSystem().createMergesDir();
282 this.journal.add(JournalEntry.CREATED_MERGE_DIR);
283
284 Map<byte[], List<StoreFile>> hstoreFilesOfRegionA = closeAndOfflineRegion(
285 services, this.region_a, true, testing);
286 Map<byte[], List<StoreFile>> hstoreFilesOfRegionB = closeAndOfflineRegion(
287 services, this.region_b, false, testing);
288
289 assert hstoreFilesOfRegionA != null && hstoreFilesOfRegionB != null;
290
291
292
293
294
295
296 mergeStoreFiles(hstoreFilesOfRegionA, hstoreFilesOfRegionB);
297
298 if (server != null && server.getZooKeeper() != null) {
299 try {
300
301
302 this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
303 this.mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo(),
304 server.getServerName(), this.znodeVersion,
305 RS_ZK_REGION_MERGING, RS_ZK_REGION_MERGING);
306 } catch (KeeperException e) {
307 throw new IOException("Failed setting MERGING znode on "
308 + this.mergedRegionInfo.getRegionNameAsString(), e);
309 }
310 }
311
312
313
314
315
316 this.journal.add(JournalEntry.STARTED_MERGED_REGION_CREATION);
317 HRegion mergedRegion = createMergedRegionFromMerges(this.region_a,
318 this.region_b, this.mergedRegionInfo);
319
320
321
322
323
324 this.journal.add(JournalEntry.PONR);
325
326
327
328
329
330
331 if (!testing) {
332 MetaEditor.mergeRegions(server.getCatalogTracker(),
333 mergedRegion.getRegionInfo(), region_a.getRegionInfo(),
334 region_b.getRegionInfo(), server.getServerName());
335 }
336 return mergedRegion;
337 }
338
339
340
341
342
343
344
345
346
347
348 HRegion createMergedRegionFromMerges(final HRegion a, final HRegion b,
349 final HRegionInfo mergedRegion) throws IOException {
350 return a.createMergedRegionFromMerges(mergedRegion, b);
351 }
352
353
354
355
356
357
358
359
360
361
362 private Map<byte[], List<StoreFile>> closeAndOfflineRegion(
363 final RegionServerServices services, final HRegion region,
364 final boolean isRegionA, final boolean testing) throws IOException {
365 Map<byte[], List<StoreFile>> hstoreFilesToMerge = null;
366 Exception exceptionToThrow = null;
367 try {
368 hstoreFilesToMerge = region.close(false);
369 } catch (Exception e) {
370 exceptionToThrow = e;
371 }
372 if (exceptionToThrow == null && hstoreFilesToMerge == null) {
373
374
375
376
377
378 exceptionToThrow = closedByOtherException;
379 }
380 if (exceptionToThrow != closedByOtherException) {
381 this.journal.add(isRegionA ? JournalEntry.CLOSED_REGION_A
382 : JournalEntry.CLOSED_REGION_B);
383 }
384 if (exceptionToThrow != null) {
385 if (exceptionToThrow instanceof IOException)
386 throw (IOException) exceptionToThrow;
387 throw new IOException(exceptionToThrow);
388 }
389
390 if (!testing) {
391 services.removeFromOnlineRegions(region, null);
392 }
393 this.journal.add(isRegionA ? JournalEntry.OFFLINED_REGION_A
394 : JournalEntry.OFFLINED_REGION_B);
395 return hstoreFilesToMerge;
396 }
397
398
399
400
401
402
403
404 public static HRegionInfo getMergedRegionInfo(final HRegionInfo a,
405 final HRegionInfo b) {
406 long rid = EnvironmentEdgeManager.currentTimeMillis();
407
408
409 if (rid < a.getRegionId() || rid < b.getRegionId()) {
410 LOG.warn("Clock skew; merging regions id are " + a.getRegionId()
411 + " and " + b.getRegionId() + ", but current time here is " + rid);
412 rid = Math.max(a.getRegionId(), b.getRegionId()) + 1;
413 }
414
415 byte[] startKey = null;
416 byte[] endKey = null;
417
418 if (a.compareTo(b) <= 0) {
419 startKey = a.getStartKey();
420 } else {
421 startKey = b.getStartKey();
422 }
423
424 if (Bytes.equals(a.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)
425 || (!Bytes.equals(b.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)
426 && Bytes.compareTo(a.getEndKey(), b.getEndKey()) > 0)) {
427 endKey = a.getEndKey();
428 } else {
429 endKey = b.getEndKey();
430 }
431
432
433 HRegionInfo mergedRegionInfo = new HRegionInfo(a.getTable(), startKey,
434 endKey, false, rid);
435 return mergedRegionInfo;
436 }
437
438
439
440
441
442
443
444
445
446
447 void openMergedRegion(final Server server,
448 final RegionServerServices services, HRegion merged) throws IOException {
449 boolean stopped = server != null && server.isStopped();
450 boolean stopping = services != null && services.isStopping();
451 if (stopped || stopping) {
452 LOG.info("Not opening merged region " + merged.getRegionNameAsString()
453 + " because stopping=" + stopping + ", stopped=" + stopped);
454 return;
455 }
456 HRegionInfo hri = merged.getRegionInfo();
457 LoggingProgressable reporter = server == null ? null
458 : new LoggingProgressable(hri, server.getConfiguration().getLong(
459 "hbase.regionserver.regionmerge.open.log.interval", 10000));
460 merged.openHRegion(reporter);
461
462 if (services != null) {
463 try {
464 services.postOpenDeployTasks(merged, server.getCatalogTracker());
465 services.addToOnlineRegions(merged);
466 } catch (KeeperException ke) {
467 throw new IOException(ke);
468 }
469 }
470
471 }
472
473
474
475
476
477
478
479
480
481 void transitionZKNode(final Server server, final RegionServerServices services)
482 throws IOException {
483 if (server == null || server.getZooKeeper() == null) {
484 return;
485 }
486
487
488 try {
489 this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
490 this.mergedRegionInfo, region_a.getRegionInfo(),
491 region_b.getRegionInfo(), server.getServerName(), this.znodeVersion,
492 RS_ZK_REGION_MERGING, RS_ZK_REGION_MERGED);
493
494 long startTime = EnvironmentEdgeManager.currentTimeMillis();
495 int spins = 0;
496
497
498
499 do {
500 if (spins % 10 == 0) {
501 LOG.debug("Still waiting on the master to process the merge for "
502 + this.mergedRegionInfo.getEncodedName() + ", waited "
503 + (EnvironmentEdgeManager.currentTimeMillis() - startTime) + "ms");
504 }
505 Thread.sleep(100);
506
507 this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
508 this.mergedRegionInfo, region_a.getRegionInfo(),
509 region_b.getRegionInfo(), server.getServerName(), this.znodeVersion,
510 RS_ZK_REGION_MERGED, RS_ZK_REGION_MERGED);
511 spins++;
512 } while (this.znodeVersion != -1 && !server.isStopped()
513 && !services.isStopping());
514 } catch (Exception e) {
515 if (e instanceof InterruptedException) {
516 Thread.currentThread().interrupt();
517 }
518 throw new IOException("Failed telling master about merge "
519 + mergedRegionInfo.getEncodedName(), e);
520 }
521
522
523
524
525 }
526
527
528
529
530
531
532
533
534
535 private int getZKNode(final Server server,
536 final RegionServerServices services) throws IOException {
537
538 try {
539 int spins = 0;
540 Stat stat = new Stat();
541 ZooKeeperWatcher zkw = server.getZooKeeper();
542 ServerName expectedServer = server.getServerName();
543 String node = mergedRegionInfo.getEncodedName();
544 while (!(server.isStopped() || services.isStopping())) {
545 if (spins % 5 == 0) {
546 LOG.debug("Still waiting for master to process "
547 + "the pending_merge for " + node);
548 transitionMergingNode(zkw, mergedRegionInfo, region_a.getRegionInfo(),
549 region_b.getRegionInfo(), expectedServer, -1, RS_ZK_REQUEST_REGION_MERGE,
550 RS_ZK_REQUEST_REGION_MERGE);
551 }
552 Thread.sleep(100);
553 spins++;
554 byte [] data = ZKAssign.getDataNoWatch(zkw, node, stat);
555 if (data == null) {
556 throw new IOException("Data is null, merging node "
557 + node + " no longer exists");
558 }
559 RegionTransition rt = RegionTransition.parseFrom(data);
560 EventType et = rt.getEventType();
561 if (et == RS_ZK_REGION_MERGING) {
562 ServerName serverName = rt.getServerName();
563 if (!serverName.equals(expectedServer)) {
564 throw new IOException("Merging node " + node + " is for "
565 + serverName + ", not us " + expectedServer);
566 }
567 byte [] payloadOfMerging = rt.getPayload();
568 List<HRegionInfo> mergingRegions = HRegionInfo.parseDelimitedFrom(
569 payloadOfMerging, 0, payloadOfMerging.length);
570 assert mergingRegions.size() == 3;
571 HRegionInfo a = mergingRegions.get(1);
572 HRegionInfo b = mergingRegions.get(2);
573 HRegionInfo hri_a = region_a.getRegionInfo();
574 HRegionInfo hri_b = region_b.getRegionInfo();
575 if (!(hri_a.equals(a) && hri_b.equals(b))) {
576 throw new IOException("Merging node " + node + " is for " + a + ", "
577 + b + ", not expected regions: " + hri_a + ", " + hri_b);
578 }
579
580 return stat.getVersion();
581 }
582 if (et != RS_ZK_REQUEST_REGION_MERGE) {
583 throw new IOException("Merging node " + node
584 + " moved out of merging to " + et);
585 }
586 }
587
588 throw new IOException("Server is "
589 + (services.isStopping() ? "stopping" : "stopped"));
590 } catch (Exception e) {
591 if (e instanceof InterruptedException) {
592 Thread.currentThread().interrupt();
593 }
594 throw new IOException("Failed getting MERGING znode on "
595 + mergedRegionInfo.getRegionNameAsString(), e);
596 }
597 }
598
599
600
601
602
603
604
605 private void mergeStoreFiles(
606 Map<byte[], List<StoreFile>> hstoreFilesOfRegionA,
607 Map<byte[], List<StoreFile>> hstoreFilesOfRegionB)
608 throws IOException {
609
610 HRegionFileSystem fs_a = this.region_a.getRegionFileSystem();
611 for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesOfRegionA
612 .entrySet()) {
613 String familyName = Bytes.toString(entry.getKey());
614 for (StoreFile storeFile : entry.getValue()) {
615 fs_a.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
616 this.mergesdir);
617 }
618 }
619
620 HRegionFileSystem fs_b = this.region_b.getRegionFileSystem();
621 for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesOfRegionB
622 .entrySet()) {
623 String familyName = Bytes.toString(entry.getKey());
624 for (StoreFile storeFile : entry.getValue()) {
625 fs_b.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
626 this.mergesdir);
627 }
628 }
629 }
630
631
632
633
634
635
636
637
638
639 @SuppressWarnings("deprecation")
640 public boolean rollback(final Server server,
641 final RegionServerServices services) throws IOException {
642 assert this.mergedRegionInfo != null;
643 boolean result = true;
644 ListIterator<JournalEntry> iterator = this.journal
645 .listIterator(this.journal.size());
646
647 while (iterator.hasPrevious()) {
648 JournalEntry je = iterator.previous();
649 switch (je) {
650
651 case SET_MERGING_IN_ZK:
652 if (server != null && server.getZooKeeper() != null) {
653 cleanZK(server, this.mergedRegionInfo);
654 }
655 break;
656
657 case CREATED_MERGE_DIR:
658 this.region_a.writestate.writesEnabled = true;
659 this.region_b.writestate.writesEnabled = true;
660 this.region_a.getRegionFileSystem().cleanupMergesDir();
661 break;
662
663 case CLOSED_REGION_A:
664 try {
665
666
667
668
669
670 this.region_a.initialize();
671 } catch (IOException e) {
672 LOG.error("Failed rollbacking CLOSED_REGION_A of region "
673 + this.region_a.getRegionNameAsString(), e);
674 throw new RuntimeException(e);
675 }
676 break;
677
678 case OFFLINED_REGION_A:
679 if (services != null)
680 services.addToOnlineRegions(this.region_a);
681 break;
682
683 case CLOSED_REGION_B:
684 try {
685 this.region_b.initialize();
686 } catch (IOException e) {
687 LOG.error("Failed rollbacking CLOSED_REGION_A of region "
688 + this.region_b.getRegionNameAsString(), e);
689 throw new RuntimeException(e);
690 }
691 break;
692
693 case OFFLINED_REGION_B:
694 if (services != null)
695 services.addToOnlineRegions(this.region_b);
696 break;
697
698 case STARTED_MERGED_REGION_CREATION:
699 this.region_a.getRegionFileSystem().cleanupMergedRegion(
700 this.mergedRegionInfo);
701 break;
702
703 case PONR:
704
705
706 return false;
707
708 default:
709 throw new RuntimeException("Unhandled journal entry: " + je);
710 }
711 }
712 return result;
713 }
714
715 HRegionInfo getMergedRegionInfo() {
716 return this.mergedRegionInfo;
717 }
718
719
720 Path getMergesDir() {
721 return this.mergesdir;
722 }
723
724 private static void cleanZK(final Server server, final HRegionInfo hri) {
725 try {
726
727 if (!ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
728 RS_ZK_REQUEST_REGION_MERGE, server.getServerName())) {
729 ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
730 RS_ZK_REGION_MERGING, server.getServerName());
731 }
732 } catch (KeeperException.NoNodeException e) {
733 LOG.info("Failed cleanup zk node of " + hri.getRegionNameAsString(), e);
734 } catch (KeeperException e) {
735 server.abort("Failed cleanup zk node of " + hri.getRegionNameAsString(),e);
736 }
737 }
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753 public static void createNodeMerging(final ZooKeeperWatcher zkw, final HRegionInfo region,
754 final ServerName serverName, final HRegionInfo a,
755 final HRegionInfo b) throws KeeperException, IOException {
756 LOG.debug(zkw.prefix("Creating ephemeral node for "
757 + region.getEncodedName() + " in PENDING_MERGE state"));
758 byte [] payload = HRegionInfo.toDelimitedByteArray(region, a, b);
759 RegionTransition rt = RegionTransition.createRegionTransition(
760 RS_ZK_REQUEST_REGION_MERGE, region.getRegionName(), serverName, payload);
761 String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
762 if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
763 throw new IOException("Failed create of ephemeral " + node);
764 }
765 }
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806 public static int transitionMergingNode(ZooKeeperWatcher zkw,
807 HRegionInfo merged, HRegionInfo a, HRegionInfo b, ServerName serverName,
808 final int znodeVersion, final EventType beginState,
809 final EventType endState) throws KeeperException, IOException {
810 byte[] payload = HRegionInfo.toDelimitedByteArray(merged, a, b);
811 return ZKAssign.transitionNode(zkw, merged, serverName,
812 beginState, endState, znodeVersion, payload);
813 }
814
815
816
817
818
819
820
821
822
823 boolean hasMergeQualifierInMeta(final RegionServerServices services,
824 final byte[] regionName) throws IOException {
825 if (services == null) return false;
826
827
828 Pair<HRegionInfo, HRegionInfo> mergeRegions = MetaReader
829 .getRegionsFromMergeQualifier(services.getCatalogTracker(), regionName);
830 if (mergeRegions != null &&
831 (mergeRegions.getFirst() != null || mergeRegions.getSecond() != null)) {
832
833 return true;
834 }
835 return false;
836 }
837 }
838