1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.List;
24 import java.util.ListIterator;
25 import java.util.Map;
26 import java.util.concurrent.Callable;
27 import java.util.concurrent.ExecutionException;
28 import java.util.concurrent.Executors;
29 import java.util.concurrent.Future;
30 import java.util.concurrent.ThreadFactory;
31 import java.util.concurrent.ThreadPoolExecutor;
32 import java.util.concurrent.TimeUnit;
33
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.hadoop.classification.InterfaceAudience;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.hbase.HRegionInfo;
39 import org.apache.hadoop.hbase.RegionTransition;
40 import org.apache.hadoop.hbase.Server;
41 import org.apache.hadoop.hbase.ServerName;
42 import org.apache.hadoop.hbase.catalog.MetaEditor;
43 import org.apache.hadoop.hbase.executor.EventType;
44 import org.apache.hadoop.hbase.util.Bytes;
45 import org.apache.hadoop.hbase.util.CancelableProgressable;
46 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
47 import org.apache.hadoop.hbase.util.HasThread;
48 import org.apache.hadoop.hbase.util.PairOfSameType;
49 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
50 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
51 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
52 import org.apache.zookeeper.KeeperException;
53 import org.apache.zookeeper.KeeperException.NodeExistsException;
54
55 import com.google.common.util.concurrent.ThreadFactoryBuilder;
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80 @InterfaceAudience.Private
81 public class SplitTransaction {
82 private static final Log LOG = LogFactory.getLog(SplitTransaction.class);
83
84
85
86
87 private final HRegion parent;
88 private HRegionInfo hri_a;
89 private HRegionInfo hri_b;
90 private long fileSplitTimeout = 30000;
91 private int znodeVersion = -1;
92
93
94
95
96 private final byte [] splitrow;
97
98
99
100
101
102
103 enum JournalEntry {
104
105
106
107 SET_SPLITTING_IN_ZK,
108
109
110
111 CREATE_SPLIT_DIR,
112
113
114
115 CLOSED_PARENT_REGION,
116
117
118
119 OFFLINED_PARENT,
120
121
122
123 STARTED_REGION_A_CREATION,
124
125
126
127 STARTED_REGION_B_CREATION,
128
129
130
131
132
133 PONR
134 }
135
136
137
138
139 private final List<JournalEntry> journal = new ArrayList<JournalEntry>();
140
141
142
143
144
145
146 public SplitTransaction(final HRegion r, final byte [] splitrow) {
147 this.parent = r;
148 this.splitrow = splitrow;
149 }
150
151
152
153
154
155
156 public boolean prepare() {
157 if (!this.parent.isSplittable()) return false;
158
159 if (this.splitrow == null) return false;
160 HRegionInfo hri = this.parent.getRegionInfo();
161 parent.prepareToSplit();
162
163 byte [] startKey = hri.getStartKey();
164 byte [] endKey = hri.getEndKey();
165 if (Bytes.equals(startKey, splitrow) ||
166 !this.parent.getRegionInfo().containsRow(splitrow)) {
167 LOG.info("Split row is not inside region key range or is equal to " +
168 "startkey: " + Bytes.toStringBinary(this.splitrow));
169 return false;
170 }
171 long rid = getDaughterRegionIdTimestamp(hri);
172 this.hri_a = new HRegionInfo(hri.getTableName(), startKey, this.splitrow, false, rid);
173 this.hri_b = new HRegionInfo(hri.getTableName(), this.splitrow, endKey, false, rid);
174 return true;
175 }
176
177
178
179
180
181
182 private static long getDaughterRegionIdTimestamp(final HRegionInfo hri) {
183 long rid = EnvironmentEdgeManager.currentTimeMillis();
184
185
186 if (rid < hri.getRegionId()) {
187 LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() +
188 " but current time here is " + rid);
189 rid = hri.getRegionId() + 1;
190 }
191 return rid;
192 }
193
194 private static IOException closedByOtherException = new IOException(
195 "Failed to close region: already closed by another thread");
196
197
198
199
200
201
202
203
204
205
206
207 final RegionServerServices services) throws IOException {
208 LOG.info("Starting split of region " + this.parent);
209 if ((server != null && server.isStopped()) ||
210 (services != null && services.isStopping())) {
211 throw new IOException("Server is stopped or stopping");
212 }
213 assert !this.parent.lock.writeLock().isHeldByCurrentThread():
214 "Unsafe to hold write lock while performing RPCs";
215
216
217 if (this.parent.getCoprocessorHost() != null) {
218 this.parent.getCoprocessorHost().preSplit();
219 }
220
221
222 if (this.parent.getCoprocessorHost() != null) {
223 this.parent.getCoprocessorHost().preSplit(this.splitrow);
224 }
225
226
227 boolean testing = server == null? true:
228 server.getConfiguration().getBoolean("hbase.testing.nocluster", false);
229 this.fileSplitTimeout = testing ? this.fileSplitTimeout :
230 server.getConfiguration().getLong("hbase.regionserver.fileSplitTimeout",
231 this.fileSplitTimeout);
232
233
234
235 if (server != null && server.getZooKeeper() != null) {
236 try {
237 createNodeSplitting(server.getZooKeeper(),
238 this.parent.getRegionInfo(), server.getServerName());
239 } catch (KeeperException e) {
240 throw new IOException("Failed creating SPLITTING znode on " +
241 this.parent.getRegionNameAsString(), e);
242 }
243 }
244 this.journal.add(JournalEntry.SET_SPLITTING_IN_ZK);
245 if (server != null && server.getZooKeeper() != null) {
246 try {
247
248
249
250
251
252
253 this.znodeVersion = transitionNodeSplitting(server.getZooKeeper(),
254 this.parent.getRegionInfo(), server.getServerName(), -1);
255 } catch (KeeperException e) {
256 throw new IOException("Failed setting SPLITTING znode on "
257 + this.parent.getRegionNameAsString(), e);
258 }
259 }
260
261 this.parent.getRegionFileSystem().createSplitsDir();
262 this.journal.add(JournalEntry.CREATE_SPLIT_DIR);
263
264 Map<byte[], List<StoreFile>> hstoreFilesToSplit = null;
265 Exception exceptionToThrow = null;
266 try{
267 hstoreFilesToSplit = this.parent.close(false);
268 } catch (Exception e) {
269 exceptionToThrow = e;
270 }
271 if (exceptionToThrow == null && hstoreFilesToSplit == null) {
272
273
274
275
276
277 exceptionToThrow = closedByOtherException;
278 }
279 if (exceptionToThrow != closedByOtherException) {
280 this.journal.add(JournalEntry.CLOSED_PARENT_REGION);
281 }
282 if (exceptionToThrow != null) {
283 if (exceptionToThrow instanceof IOException) throw (IOException)exceptionToThrow;
284 throw new IOException(exceptionToThrow);
285 }
286 if (!testing) {
287 services.removeFromOnlineRegions(this.parent, null);
288 }
289 this.journal.add(JournalEntry.OFFLINED_PARENT);
290
291
292
293
294
295
296
297 splitStoreFiles(hstoreFilesToSplit);
298
299
300
301
302
303 this.journal.add(JournalEntry.STARTED_REGION_A_CREATION);
304 HRegion a = this.parent.createDaughterRegionFromSplits(this.hri_a);
305
306
307 this.journal.add(JournalEntry.STARTED_REGION_B_CREATION);
308 HRegion b = this.parent.createDaughterRegionFromSplits(this.hri_b);
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325 this.journal.add(JournalEntry.PONR);
326
327
328
329
330
331
332 if (!testing) {
333 MetaEditor.splitRegion(server.getCatalogTracker(), parent.getRegionInfo(),
334 a.getRegionInfo(), b.getRegionInfo(), server.getServerName());
335 }
336 return new PairOfSameType<HRegion>(a, b);
337 }
338
339
340
341
342
343
344
345
346
347
348
349
350 final RegionServerServices services, HRegion a, HRegion b)
351 throws IOException {
352 boolean stopped = server != null && server.isStopped();
353 boolean stopping = services != null && services.isStopping();
354
355 if (stopped || stopping) {
356 LOG.info("Not opening daughters " +
357 b.getRegionInfo().getRegionNameAsString() +
358 " and " +
359 a.getRegionInfo().getRegionNameAsString() +
360 " because stopping=" + stopping + ", stopped=" + stopped);
361 } else {
362
363 DaughterOpener aOpener = new DaughterOpener(server, a);
364 DaughterOpener bOpener = new DaughterOpener(server, b);
365 aOpener.start();
366 bOpener.start();
367 try {
368 aOpener.join();
369 bOpener.join();
370 } catch (InterruptedException e) {
371 Thread.currentThread().interrupt();
372 throw new IOException("Interrupted " + e.getMessage());
373 }
374 if (aOpener.getException() != null) {
375 throw new IOException("Failed " +
376 aOpener.getName(), aOpener.getException());
377 }
378 if (bOpener.getException() != null) {
379 throw new IOException("Failed " +
380 bOpener.getName(), bOpener.getException());
381 }
382 if (services != null) {
383 try {
384
385 services.postOpenDeployTasks(b, server.getCatalogTracker());
386
387 services.addToOnlineRegions(b);
388 services.postOpenDeployTasks(a, server.getCatalogTracker());
389 services.addToOnlineRegions(a);
390 } catch (KeeperException ke) {
391 throw new IOException(ke);
392 }
393 }
394 }
395 }
396
397
398
399
400
401
402
403
404
405
406
407
408 final RegionServerServices services, HRegion a, HRegion b)
409 throws IOException {
410
411 if (server != null && server.getZooKeeper() != null) {
412 try {
413 this.znodeVersion = transitionNodeSplit(server.getZooKeeper(),
414 parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
415 server.getServerName(), this.znodeVersion);
416
417 int spins = 0;
418
419
420
421 do {
422 if (spins % 10 == 0) {
423 LOG.debug("Still waiting on the master to process the split for " +
424 this.parent.getRegionInfo().getEncodedName());
425 }
426 Thread.sleep(100);
427
428 this.znodeVersion = tickleNodeSplit(server.getZooKeeper(),
429 parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
430 server.getServerName(), this.znodeVersion);
431 spins++;
432 } while (this.znodeVersion != -1 && !server.isStopped()
433 && !services.isStopping());
434 } catch (Exception e) {
435 if (e instanceof InterruptedException) {
436 Thread.currentThread().interrupt();
437 }
438 throw new IOException("Failed telling master about split", e);
439 }
440 }
441
442
443 if (this.parent.getCoprocessorHost() != null) {
444 this.parent.getCoprocessorHost().postSplit(a,b);
445 }
446
447
448
449
450 }
451
452
453
454
455
456
457
458
459
460
461
462
463 public PairOfSameType<HRegion> execute(final Server server,
464 final RegionServerServices services)
465 throws IOException {
466 PairOfSameType<HRegion> regions = createDaughters(server, services);
467 openDaughters(server, services, regions.getFirst(), regions.getSecond());
468 transitionZKNode(server, services, regions.getFirst(), regions.getSecond());
469 return regions;
470 }
471
472
473
474
475
476 class DaughterOpener extends HasThread {
477 private final Server server;
478 private final HRegion r;
479 private Throwable t = null;
480
481 DaughterOpener(final Server s, final HRegion r) {
482 super((s == null? "null-services": s.getServerName()) +
483 "-daughterOpener=" + r.getRegionInfo().getEncodedName());
484 setDaemon(true);
485 this.server = s;
486 this.r = r;
487 }
488
489
490
491
492
493 Throwable getException() {
494 return this.t;
495 }
496
497 @Override
498 public void run() {
499 try {
500 openDaughterRegion(this.server, r);
501 } catch (Throwable t) {
502 this.t = t;
503 }
504 }
505 }
506
507
508
509
510
511
512
513
514 void openDaughterRegion(final Server server, final HRegion daughter)
515 throws IOException, KeeperException {
516 HRegionInfo hri = daughter.getRegionInfo();
517 LoggingProgressable reporter = server == null ? null
518 : new LoggingProgressable(hri, server.getConfiguration().getLong(
519 "hbase.regionserver.split.daughter.open.log.interval", 10000));
520 daughter.openHRegion(reporter);
521 }
522
523 static class LoggingProgressable implements CancelableProgressable {
524 private final HRegionInfo hri;
525 private long lastLog = -1;
526 private final long interval;
527
528 LoggingProgressable(final HRegionInfo hri, final long interval) {
529 this.hri = hri;
530 this.interval = interval;
531 }
532
533 @Override
534 public boolean progress() {
535 long now = System.currentTimeMillis();
536 if (now - lastLog > this.interval) {
537 LOG.info("Opening " + this.hri.getRegionNameAsString());
538 this.lastLog = now;
539 }
540 return true;
541 }
542 }
543
544 private void splitStoreFiles(final Map<byte[], List<StoreFile>> hstoreFilesToSplit)
545 throws IOException {
546 if (hstoreFilesToSplit == null) {
547
548 throw new IOException("Close returned empty list of StoreFiles");
549 }
550
551
552
553 int nbFiles = hstoreFilesToSplit.size();
554 if (nbFiles == 0) {
555
556 return;
557 }
558 ThreadFactoryBuilder builder = new ThreadFactoryBuilder();
559 builder.setNameFormat("StoreFileSplitter-%1$d");
560 ThreadFactory factory = builder.build();
561 ThreadPoolExecutor threadPool =
562 (ThreadPoolExecutor) Executors.newFixedThreadPool(nbFiles, factory);
563 List<Future<Void>> futures = new ArrayList<Future<Void>>(nbFiles);
564
565
566 for (Map.Entry<byte[], List<StoreFile>> entry: hstoreFilesToSplit.entrySet()) {
567 for (StoreFile sf: entry.getValue()) {
568 StoreFileSplitter sfs = new StoreFileSplitter(entry.getKey(), sf);
569 futures.add(threadPool.submit(sfs));
570 }
571 }
572
573 threadPool.shutdown();
574
575
576 try {
577 boolean stillRunning = !threadPool.awaitTermination(
578 this.fileSplitTimeout, TimeUnit.MILLISECONDS);
579 if (stillRunning) {
580 threadPool.shutdownNow();
581
582 while (!threadPool.isTerminated()) {
583 Thread.sleep(50);
584 }
585 throw new IOException("Took too long to split the" +
586 " files and create the references, aborting split");
587 }
588 } catch (InterruptedException e) {
589 Thread.currentThread().interrupt();
590 throw new IOException("Interrupted while waiting for file splitters", e);
591 }
592
593
594 for (Future<Void> future: futures) {
595 try {
596 future.get();
597 } catch (InterruptedException e) {
598 Thread.currentThread().interrupt();
599 throw new IOException(
600 "Interrupted while trying to get the results of file splitters", e);
601 } catch (ExecutionException e) {
602 throw new IOException(e);
603 }
604 }
605 }
606
607 private void splitStoreFile(final byte[] family, final StoreFile sf) throws IOException {
608 HRegionFileSystem fs = this.parent.getRegionFileSystem();
609 String familyName = Bytes.toString(family);
610 fs.splitStoreFile(this.hri_a, familyName, sf, this.splitrow, false);
611 fs.splitStoreFile(this.hri_b, familyName, sf, this.splitrow, true);
612 }
613
614
615
616
617
618 class StoreFileSplitter implements Callable<Void> {
619 private final byte[] family;
620 private final StoreFile sf;
621
622
623
624
625
626
627 public StoreFileSplitter(final byte[] family, final StoreFile sf) {
628 this.sf = sf;
629 this.family = family;
630 }
631
632 public Void call() throws IOException {
633 splitStoreFile(family, sf);
634 return null;
635 }
636 }
637
638
639
640
641
642
643
644
645 public boolean rollback(final Server server, final RegionServerServices services)
646 throws IOException {
647
648 if (this.parent.getCoprocessorHost() != null) {
649 this.parent.getCoprocessorHost().preRollBackSplit();
650 }
651
652 boolean result = true;
653 ListIterator<JournalEntry> iterator =
654 this.journal.listIterator(this.journal.size());
655
656 while (iterator.hasPrevious()) {
657 JournalEntry je = iterator.previous();
658 switch(je) {
659
660 case SET_SPLITTING_IN_ZK:
661 if (server != null && server.getZooKeeper() != null) {
662 cleanZK(server, this.parent.getRegionInfo());
663 }
664 break;
665
666 case CREATE_SPLIT_DIR:
667 this.parent.writestate.writesEnabled = true;
668 this.parent.getRegionFileSystem().cleanupSplitsDir();
669 break;
670
671 case CLOSED_PARENT_REGION:
672 try {
673
674
675
676
677
678 this.parent.initialize();
679 } catch (IOException e) {
680 LOG.error("Failed rollbacking CLOSED_PARENT_REGION of region " +
681 this.parent.getRegionNameAsString(), e);
682 throw new RuntimeException(e);
683 }
684 break;
685
686 case STARTED_REGION_A_CREATION:
687 this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_a);
688 break;
689
690 case STARTED_REGION_B_CREATION:
691 this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_b);
692 break;
693
694 case OFFLINED_PARENT:
695 if (services != null) services.addToOnlineRegions(this.parent);
696 break;
697
698 case PONR:
699
700
701
702
703 return false;
704
705 default:
706 throw new RuntimeException("Unhandled journal entry: " + je);
707 }
708 }
709
710 if (this.parent.getCoprocessorHost() != null) {
711 this.parent.getCoprocessorHost().postRollBackSplit();
712 }
713 return result;
714 }
715
716 HRegionInfo getFirstDaughter() {
717 return hri_a;
718 }
719
720 HRegionInfo getSecondDaughter() {
721 return hri_b;
722 }
723
724 private static void cleanZK(final Server server, final HRegionInfo hri) {
725 try {
726
727 ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
728 EventType.RS_ZK_REGION_SPLITTING);
729 } catch (KeeperException e) {
730 server.abort("Failed cleanup of " + hri.getRegionNameAsString(), e);
731 }
732 }
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748 int createNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo region,
749 final ServerName serverName) throws KeeperException, IOException {
750 LOG.debug(zkw.prefix("Creating ephemeral node for " +
751 region.getEncodedName() + " in SPLITTING state"));
752 RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
753 region.getRegionName(), serverName);
754 String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
755 if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
756 throw new IOException("Failed create of ephemeral " + node);
757 }
758
759
760 return transitionNodeSplitting(zkw, region, serverName, -1);
761 }
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796 private static int transitionNodeSplit(ZooKeeperWatcher zkw,
797 HRegionInfo parent, HRegionInfo a, HRegionInfo b, ServerName serverName,
798 final int znodeVersion)
799 throws KeeperException, IOException {
800 byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
801 return ZKAssign.transitionNode(zkw, parent, serverName,
802 EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLIT,
803 znodeVersion, payload);
804 }
805
806
807
808
809
810
811
812
813
814
815
816 int transitionNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo parent,
817 final ServerName serverName, final int version) throws KeeperException, IOException {
818 return ZKAssign.transitionNode(zkw, parent, serverName,
819 EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
820 }
821
822 private static int tickleNodeSplit(ZooKeeperWatcher zkw,
823 HRegionInfo parent, HRegionInfo a, HRegionInfo b, ServerName serverName,
824 final int znodeVersion)
825 throws KeeperException, IOException {
826 byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
827 return ZKAssign.transitionNode(zkw, parent, serverName,
828 EventType.RS_ZK_REGION_SPLIT, EventType.RS_ZK_REGION_SPLIT,
829 znodeVersion, payload);
830 }
831 }