View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.List;
24  import java.util.ListIterator;
25  import java.util.Map;
26  import java.util.concurrent.Callable;
27  import java.util.concurrent.ExecutionException;
28  import java.util.concurrent.Executors;
29  import java.util.concurrent.Future;
30  import java.util.concurrent.ThreadFactory;
31  import java.util.concurrent.ThreadPoolExecutor;
32  import java.util.concurrent.TimeUnit;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.classification.InterfaceAudience;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.hbase.HRegionInfo;
39  import org.apache.hadoop.hbase.RegionTransition;
40  import org.apache.hadoop.hbase.Server;
41  import org.apache.hadoop.hbase.ServerName;
42  import org.apache.hadoop.hbase.catalog.MetaEditor;
43  import org.apache.hadoop.hbase.executor.EventType;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.apache.hadoop.hbase.util.CancelableProgressable;
46  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
47  import org.apache.hadoop.hbase.util.HasThread;
48  import org.apache.hadoop.hbase.util.PairOfSameType;
49  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
50  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
51  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
52  import org.apache.zookeeper.KeeperException;
53  import org.apache.zookeeper.KeeperException.NodeExistsException;
54  
55  import com.google.common.util.concurrent.ThreadFactoryBuilder;
56  
57  /**
58   * Executes region split as a "transaction".  Call {@link #prepare()} to setup
59   * the transaction, {@link #execute(Server, RegionServerServices)} to run the
60   * transaction and {@link #rollback(Server, RegionServerServices)} to cleanup if execute fails.
61   *
62   * <p>Here is an example of how you would use this class:
63   * <pre>
64   *  SplitTransaction st = new SplitTransaction(this.conf, parent, midKey)
65   *  if (!st.prepare()) return;
66   *  try {
67   *    st.execute(server, services);
68   *  } catch (IOException ioe) {
69   *    try {
70   *      st.rollback(server, services);
71   *      return;
72   *    } catch (RuntimeException e) {
73   *      myAbortable.abort("Failed split, abort");
74   *    }
75   *  }
76   * </Pre>
77   * <p>This class is not thread safe.  Caller needs ensure split is run by
78   * one thread only.
79   */
80  @InterfaceAudience.Private
81  public class SplitTransaction {
82    private static final Log LOG = LogFactory.getLog(SplitTransaction.class);
83  
84    /*
85     * Region to split
86     */
87    private final HRegion parent;
88    private HRegionInfo hri_a;
89    private HRegionInfo hri_b;
90    private long fileSplitTimeout = 30000;
91    private int znodeVersion = -1;
92  
93    /*
94     * Row to split around
95     */
96    private final byte [] splitrow;
97  
98    /**
99     * Types to add to the transaction journal.
100    * Each enum is a step in the split transaction. Used to figure how much
101    * we need to rollback.
102    */
103   enum JournalEntry {
104     /**
105      * Set region as in transition, set it into SPLITTING state.
106      */
107     SET_SPLITTING_IN_ZK,
108     /**
109      * We created the temporary split data directory.
110      */
111     CREATE_SPLIT_DIR,
112     /**
113      * Closed the parent region.
114      */
115     CLOSED_PARENT_REGION,
116     /**
117      * The parent has been taken out of the server's online regions list.
118      */
119     OFFLINED_PARENT,
120     /**
121      * Started in on creation of the first daughter region.
122      */
123     STARTED_REGION_A_CREATION,
124     /**
125      * Started in on the creation of the second daughter region.
126      */
127     STARTED_REGION_B_CREATION,
128     /**
129      * Point of no return.
130      * If we got here, then transaction is not recoverable other than by
131      * crashing out the regionserver.
132      */
133     PONR
134   }
135 
136   /*
137    * Journal of how far the split transaction has progressed.
138    */
139   private final List<JournalEntry> journal = new ArrayList<JournalEntry>();
140 
141   /**
142    * Constructor
143    * @param r Region to split
144    * @param splitrow Row to split around
145    */
146   public SplitTransaction(final HRegion r, final byte [] splitrow) {
147     this.parent = r;
148     this.splitrow = splitrow;
149   }
150 
151   /**
152    * Does checks on split inputs.
153    * @return <code>true</code> if the region is splittable else
154    * <code>false</code> if it is not (e.g. its already closed, etc.).
155    */
156   public boolean prepare() {
157     if (!this.parent.isSplittable()) return false;
158     // Split key can be null if this region is unsplittable; i.e. has refs.
159     if (this.splitrow == null) return false;
160     HRegionInfo hri = this.parent.getRegionInfo();
161     parent.prepareToSplit();
162     // Check splitrow.
163     byte [] startKey = hri.getStartKey();
164     byte [] endKey = hri.getEndKey();
165     if (Bytes.equals(startKey, splitrow) ||
166         !this.parent.getRegionInfo().containsRow(splitrow)) {
167       LOG.info("Split row is not inside region key range or is equal to " +
168           "startkey: " + Bytes.toStringBinary(this.splitrow));
169       return false;
170     }
171     long rid = getDaughterRegionIdTimestamp(hri);
172     this.hri_a = new HRegionInfo(hri.getTableName(), startKey, this.splitrow, false, rid);
173     this.hri_b = new HRegionInfo(hri.getTableName(), this.splitrow, endKey, false, rid);
174     return true;
175   }
176 
177   /**
178    * Calculate daughter regionid to use.
179    * @param hri Parent {@link HRegionInfo}
180    * @return Daughter region id (timestamp) to use.
181    */
182   private static long getDaughterRegionIdTimestamp(final HRegionInfo hri) {
183     long rid = EnvironmentEdgeManager.currentTimeMillis();
184     // Regionid is timestamp.  Can't be less than that of parent else will insert
185     // at wrong location in .META. (See HBASE-710).
186     if (rid < hri.getRegionId()) {
187       LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() +
188         " but current time here is " + rid);
189       rid = hri.getRegionId() + 1;
190     }
191     return rid;
192   }
193 
194   private static IOException closedByOtherException = new IOException(
195       "Failed to close region: already closed by another thread");
196 
197   /**
198    * Prepare the regions and region files.
199    * @param server Hosting server instance.  Can be null when testing (won't try
200    * and update in zk if a null server)
201    * @param services Used to online/offline regions.
202    * @throws IOException If thrown, transaction failed.
203    *    Call {@link #rollback(Server, RegionServerServices)}
204    * @return Regions created
205    */
206   /* package */PairOfSameType<HRegion> createDaughters(final Server server,
207       final RegionServerServices services) throws IOException {
208     LOG.info("Starting split of region " + this.parent);
209     if ((server != null && server.isStopped()) ||
210         (services != null && services.isStopping())) {
211       throw new IOException("Server is stopped or stopping");
212     }
213     assert !this.parent.lock.writeLock().isHeldByCurrentThread():
214       "Unsafe to hold write lock while performing RPCs";
215 
216     // Coprocessor callback
217     if (this.parent.getCoprocessorHost() != null) {
218       this.parent.getCoprocessorHost().preSplit();
219     }
220 
221     // Coprocessor callback
222     if (this.parent.getCoprocessorHost() != null) {
223       this.parent.getCoprocessorHost().preSplit(this.splitrow);
224     }
225 
226     // If true, no cluster to write meta edits to or to update znodes in.
227     boolean testing = server == null? true:
228         server.getConfiguration().getBoolean("hbase.testing.nocluster", false);
229     this.fileSplitTimeout = testing ? this.fileSplitTimeout :
230         server.getConfiguration().getLong("hbase.regionserver.fileSplitTimeout",
231           this.fileSplitTimeout);
232 
233     // Set ephemeral SPLITTING znode up in zk.  Mocked servers sometimes don't
234     // have zookeeper so don't do zk stuff if server or zookeeper is null
235     if (server != null && server.getZooKeeper() != null) {
236       try {
237         createNodeSplitting(server.getZooKeeper(),
238           this.parent.getRegionInfo(), server.getServerName());
239       } catch (KeeperException e) {
240         throw new IOException("Failed creating SPLITTING znode on " +
241           this.parent.getRegionNameAsString(), e);
242       }
243     }
244     this.journal.add(JournalEntry.SET_SPLITTING_IN_ZK);
245     if (server != null && server.getZooKeeper() != null) {
246       try {
247         // Transition node from SPLITTING to SPLITTING after creating the split node.
248         // Master will get the callback for node change only if the transition is successful.
249         // Note that if the transition fails then the rollback will delete the created znode
250         // as the journal entry SET_SPLITTING_IN_ZK is added.
251         // TODO : May be we can add some new state to znode and handle the new state incase
252         //        of success/failure
253         this.znodeVersion = transitionNodeSplitting(server.getZooKeeper(),
254             this.parent.getRegionInfo(), server.getServerName(), -1);
255       } catch (KeeperException e) {
256         throw new IOException("Failed setting SPLITTING znode on "
257             + this.parent.getRegionNameAsString(), e);
258       }
259     }
260 
261     this.parent.getRegionFileSystem().createSplitsDir();
262     this.journal.add(JournalEntry.CREATE_SPLIT_DIR);
263 
264     Map<byte[], List<StoreFile>> hstoreFilesToSplit = null;
265     Exception exceptionToThrow = null;
266     try{
267       hstoreFilesToSplit = this.parent.close(false);
268     } catch (Exception e) {
269       exceptionToThrow = e;
270     }
271     if (exceptionToThrow == null && hstoreFilesToSplit == null) {
272       // The region was closed by a concurrent thread.  We can't continue
273       // with the split, instead we must just abandon the split.  If we
274       // reopen or split this could cause problems because the region has
275       // probably already been moved to a different server, or is in the
276       // process of moving to a different server.
277       exceptionToThrow = closedByOtherException;
278     }
279     if (exceptionToThrow != closedByOtherException) {
280       this.journal.add(JournalEntry.CLOSED_PARENT_REGION);
281     }
282     if (exceptionToThrow != null) {
283       if (exceptionToThrow instanceof IOException) throw (IOException)exceptionToThrow;
284       throw new IOException(exceptionToThrow);
285     }
286     if (!testing) {
287       services.removeFromOnlineRegions(this.parent, null);
288     }
289     this.journal.add(JournalEntry.OFFLINED_PARENT);
290 
291     // TODO: If splitStoreFiles were multithreaded would we complete steps in
292     // less elapsed time?  St.Ack 20100920
293     //
294     // splitStoreFiles creates daughter region dirs under the parent splits dir
295     // Nothing to unroll here if failure -- clean up of CREATE_SPLIT_DIR will
296     // clean this up.
297     splitStoreFiles(hstoreFilesToSplit);
298 
299     // Log to the journal that we are creating region A, the first daughter
300     // region.  We could fail halfway through.  If we do, we could have left
301     // stuff in fs that needs cleanup -- a storefile or two.  Thats why we
302     // add entry to journal BEFORE rather than AFTER the change.
303     this.journal.add(JournalEntry.STARTED_REGION_A_CREATION);
304     HRegion a = this.parent.createDaughterRegionFromSplits(this.hri_a);
305 
306     // Ditto
307     this.journal.add(JournalEntry.STARTED_REGION_B_CREATION);
308     HRegion b = this.parent.createDaughterRegionFromSplits(this.hri_b);
309 
310     // This is the point of no return.  Adding subsequent edits to .META. as we
311     // do below when we do the daughter opens adding each to .META. can fail in
312     // various interesting ways the most interesting of which is a timeout
313     // BUT the edits all go through (See HBASE-3872).  IF we reach the PONR
314     // then subsequent failures need to crash out this regionserver; the
315     // server shutdown processing should be able to fix-up the incomplete split.
316     // The offlined parent will have the daughters as extra columns.  If
317     // we leave the daughter regions in place and do not remove them when we
318     // crash out, then they will have their references to the parent in place
319     // still and the server shutdown fixup of .META. will point to these
320     // regions.
321     // We should add PONR JournalEntry before offlineParentInMeta,so even if
322     // OfflineParentInMeta timeout,this will cause regionserver exit,and then
323     // master ServerShutdownHandler will fix daughter & avoid data loss. (See
324     // HBase-4562).
325     this.journal.add(JournalEntry.PONR);
326 
327     // Edit parent in meta.  Offlines parent region and adds splita and splitb
328     // as an atomic update. See HBASE-7721. This update to META makes the region
329     // will determine whether the region is split or not in case of failures.
330     // If it is successful, master will roll-forward, if not, master will rollback
331     // and assign the parent region.
332     if (!testing) {
333       MetaEditor.splitRegion(server.getCatalogTracker(), parent.getRegionInfo(),
334           a.getRegionInfo(), b.getRegionInfo(), server.getServerName());
335     }
336     return new PairOfSameType<HRegion>(a, b);
337   }
338 
339   /**
340    * Perform time consuming opening of the daughter regions.
341    * @param server Hosting server instance.  Can be null when testing (won't try
342    * and update in zk if a null server)
343    * @param services Used to online/offline regions.
344    * @param a first daughter region
345    * @param a second daughter region
346    * @throws IOException If thrown, transaction failed.
347    *          Call {@link #rollback(Server, RegionServerServices)}
348    */
349   /* package */void openDaughters(final Server server,
350       final RegionServerServices services, HRegion a, HRegion b)
351       throws IOException {
352     boolean stopped = server != null && server.isStopped();
353     boolean stopping = services != null && services.isStopping();
354     // TODO: Is this check needed here?
355     if (stopped || stopping) {
356       LOG.info("Not opening daughters " +
357           b.getRegionInfo().getRegionNameAsString() +
358           " and " +
359           a.getRegionInfo().getRegionNameAsString() +
360           " because stopping=" + stopping + ", stopped=" + stopped);
361     } else {
362       // Open daughters in parallel.
363       DaughterOpener aOpener = new DaughterOpener(server, a);
364       DaughterOpener bOpener = new DaughterOpener(server, b);
365       aOpener.start();
366       bOpener.start();
367       try {
368         aOpener.join();
369         bOpener.join();
370       } catch (InterruptedException e) {
371         Thread.currentThread().interrupt();
372         throw new IOException("Interrupted " + e.getMessage());
373       }
374       if (aOpener.getException() != null) {
375         throw new IOException("Failed " +
376           aOpener.getName(), aOpener.getException());
377       }
378       if (bOpener.getException() != null) {
379         throw new IOException("Failed " +
380           bOpener.getName(), bOpener.getException());
381       }
382       if (services != null) {
383         try {
384           // add 2nd daughter first (see HBASE-4335)
385           services.postOpenDeployTasks(b, server.getCatalogTracker());
386           // Should add it to OnlineRegions
387           services.addToOnlineRegions(b);
388           services.postOpenDeployTasks(a, server.getCatalogTracker());
389           services.addToOnlineRegions(a);
390         } catch (KeeperException ke) {
391           throw new IOException(ke);
392         }
393       }
394     }
395   }
396 
397   /**
398    * Finish off split transaction, transition the zknode
399    * @param server Hosting server instance.  Can be null when testing (won't try
400    * and update in zk if a null server)
401    * @param services Used to online/offline regions.
402    * @param a first daughter region
403    * @param a second daughter region
404    * @throws IOException If thrown, transaction failed.
405    *          Call {@link #rollback(Server, RegionServerServices)}
406    */
407   /* package */void transitionZKNode(final Server server,
408       final RegionServerServices services, HRegion a, HRegion b)
409       throws IOException {
410     // Tell master about split by updating zk.  If we fail, abort.
411     if (server != null && server.getZooKeeper() != null) {
412       try {
413         this.znodeVersion = transitionNodeSplit(server.getZooKeeper(),
414           parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
415           server.getServerName(), this.znodeVersion);
416 
417         int spins = 0;
418         // Now wait for the master to process the split. We know it's done
419         // when the znode is deleted. The reason we keep tickling the znode is
420         // that it's possible for the master to miss an event.
421         do {
422           if (spins % 10 == 0) {
423             LOG.debug("Still waiting on the master to process the split for " +
424                 this.parent.getRegionInfo().getEncodedName());
425           }
426           Thread.sleep(100);
427           // When this returns -1 it means the znode doesn't exist
428           this.znodeVersion = tickleNodeSplit(server.getZooKeeper(),
429             parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
430             server.getServerName(), this.znodeVersion);
431           spins++;
432         } while (this.znodeVersion != -1 && !server.isStopped()
433             && !services.isStopping());
434       } catch (Exception e) {
435         if (e instanceof InterruptedException) {
436           Thread.currentThread().interrupt();
437         }
438         throw new IOException("Failed telling master about split", e);
439       }
440     }
441 
442     // Coprocessor callback
443     if (this.parent.getCoprocessorHost() != null) {
444       this.parent.getCoprocessorHost().postSplit(a,b);
445     }
446 
447     // Leaving here, the splitdir with its dross will be in place but since the
448     // split was successful, just leave it; it'll be cleaned when parent is
449     // deleted and cleaned up.
450   }
451 
452   /**
453    * Run the transaction.
454    * @param server Hosting server instance.  Can be null when testing (won't try
455    * and update in zk if a null server)
456    * @param services Used to online/offline regions.
457    * @throws IOException If thrown, transaction failed.
458    *          Call {@link #rollback(Server, RegionServerServices)}
459    * @return Regions created
460    * @throws IOException
461    * @see #rollback(Server, RegionServerServices)
462    */
463   public PairOfSameType<HRegion> execute(final Server server,
464       final RegionServerServices services)
465   throws IOException {
466     PairOfSameType<HRegion> regions = createDaughters(server, services);
467     openDaughters(server, services, regions.getFirst(), regions.getSecond());
468     transitionZKNode(server, services, regions.getFirst(), regions.getSecond());
469     return regions;
470   }
471 
472   /*
473    * Open daughter region in its own thread.
474    * If we fail, abort this hosting server.
475    */
476   class DaughterOpener extends HasThread {
477     private final Server server;
478     private final HRegion r;
479     private Throwable t = null;
480 
481     DaughterOpener(final Server s, final HRegion r) {
482       super((s == null? "null-services": s.getServerName()) +
483         "-daughterOpener=" + r.getRegionInfo().getEncodedName());
484       setDaemon(true);
485       this.server = s;
486       this.r = r;
487     }
488 
489     /**
490      * @return Null if open succeeded else exception that causes us fail open.
491      * Call it after this thread exits else you may get wrong view on result.
492      */
493     Throwable getException() {
494       return this.t;
495     }
496 
497     @Override
498     public void run() {
499       try {
500         openDaughterRegion(this.server, r);
501       } catch (Throwable t) {
502         this.t = t;
503       }
504     }
505   }
506 
507   /**
508    * Open daughter regions, add them to online list and update meta.
509    * @param server
510    * @param daughter
511    * @throws IOException
512    * @throws KeeperException
513    */
514   void openDaughterRegion(final Server server, final HRegion daughter)
515   throws IOException, KeeperException {
516     HRegionInfo hri = daughter.getRegionInfo();
517     LoggingProgressable reporter = server == null ? null
518         : new LoggingProgressable(hri, server.getConfiguration().getLong(
519             "hbase.regionserver.split.daughter.open.log.interval", 10000));
520     daughter.openHRegion(reporter);
521   }
522 
523   static class LoggingProgressable implements CancelableProgressable {
524     private final HRegionInfo hri;
525     private long lastLog = -1;
526     private final long interval;
527 
528     LoggingProgressable(final HRegionInfo hri, final long interval) {
529       this.hri = hri;
530       this.interval = interval;
531     }
532 
533     @Override
534     public boolean progress() {
535       long now = System.currentTimeMillis();
536       if (now - lastLog > this.interval) {
537         LOG.info("Opening " + this.hri.getRegionNameAsString());
538         this.lastLog = now;
539       }
540       return true;
541     }
542   }
543 
544   private void splitStoreFiles(final Map<byte[], List<StoreFile>> hstoreFilesToSplit)
545       throws IOException {
546     if (hstoreFilesToSplit == null) {
547       // Could be null because close didn't succeed -- for now consider it fatal
548       throw new IOException("Close returned empty list of StoreFiles");
549     }
550     // The following code sets up a thread pool executor with as many slots as
551     // there's files to split. It then fires up everything, waits for
552     // completion and finally checks for any exception
553     int nbFiles = hstoreFilesToSplit.size();
554     if (nbFiles == 0) {
555       // no file needs to be splitted.
556       return;
557     }
558     ThreadFactoryBuilder builder = new ThreadFactoryBuilder();
559     builder.setNameFormat("StoreFileSplitter-%1$d");
560     ThreadFactory factory = builder.build();
561     ThreadPoolExecutor threadPool =
562       (ThreadPoolExecutor) Executors.newFixedThreadPool(nbFiles, factory);
563     List<Future<Void>> futures = new ArrayList<Future<Void>>(nbFiles);
564 
565     // Split each store file.
566     for (Map.Entry<byte[], List<StoreFile>> entry: hstoreFilesToSplit.entrySet()) {
567       for (StoreFile sf: entry.getValue()) {
568         StoreFileSplitter sfs = new StoreFileSplitter(entry.getKey(), sf);
569         futures.add(threadPool.submit(sfs));
570       }
571     }
572     // Shutdown the pool
573     threadPool.shutdown();
574 
575     // Wait for all the tasks to finish
576     try {
577       boolean stillRunning = !threadPool.awaitTermination(
578           this.fileSplitTimeout, TimeUnit.MILLISECONDS);
579       if (stillRunning) {
580         threadPool.shutdownNow();
581         // wait for the thread to shutdown completely.
582         while (!threadPool.isTerminated()) {
583           Thread.sleep(50);
584         }
585         throw new IOException("Took too long to split the" +
586             " files and create the references, aborting split");
587       }
588     } catch (InterruptedException e) {
589       Thread.currentThread().interrupt();
590       throw new IOException("Interrupted while waiting for file splitters", e);
591     }
592 
593     // Look for any exception
594     for (Future<Void> future: futures) {
595       try {
596         future.get();
597       } catch (InterruptedException e) {
598         Thread.currentThread().interrupt();
599         throw new IOException(
600             "Interrupted while trying to get the results of file splitters", e);
601       } catch (ExecutionException e) {
602         throw new IOException(e);
603       }
604     }
605   }
606 
607   private void splitStoreFile(final byte[] family, final StoreFile sf) throws IOException {
608     HRegionFileSystem fs = this.parent.getRegionFileSystem();
609     String familyName = Bytes.toString(family);
610     fs.splitStoreFile(this.hri_a, familyName, sf, this.splitrow, false);
611     fs.splitStoreFile(this.hri_b, familyName, sf, this.splitrow, true);
612   }
613 
614   /**
615    * Utility class used to do the file splitting / reference writing
616    * in parallel instead of sequentially.
617    */
618   class StoreFileSplitter implements Callable<Void> {
619     private final byte[] family;
620     private final StoreFile sf;
621 
622     /**
623      * Constructor that takes what it needs to split
624      * @param family Family that contains the store file
625      * @param sf which file
626      */
627     public StoreFileSplitter(final byte[] family, final StoreFile sf) {
628       this.sf = sf;
629       this.family = family;
630     }
631 
632     public Void call() throws IOException {
633       splitStoreFile(family, sf);
634       return null;
635     }
636   }
637 
638   /**
639    * @param server Hosting server instance (May be null when testing).
640    * @param services
641    * @throws IOException If thrown, rollback failed.  Take drastic action.
642    * @return True if we successfully rolled back, false if we got to the point
643    * of no return and so now need to abort the server to minimize damage.
644    */
645   public boolean rollback(final Server server, final RegionServerServices services)
646   throws IOException {
647     // Coprocessor callback
648     if (this.parent.getCoprocessorHost() != null) {
649       this.parent.getCoprocessorHost().preRollBackSplit();
650     }
651 
652     boolean result = true;
653     ListIterator<JournalEntry> iterator =
654       this.journal.listIterator(this.journal.size());
655     // Iterate in reverse.
656     while (iterator.hasPrevious()) {
657       JournalEntry je = iterator.previous();
658       switch(je) {
659 
660       case SET_SPLITTING_IN_ZK:
661         if (server != null && server.getZooKeeper() != null) {
662           cleanZK(server, this.parent.getRegionInfo());
663         }
664         break;
665 
666       case CREATE_SPLIT_DIR:
667         this.parent.writestate.writesEnabled = true;
668         this.parent.getRegionFileSystem().cleanupSplitsDir();
669         break;
670 
671       case CLOSED_PARENT_REGION:
672         try {
673           // So, this returns a seqid but if we just closed and then reopened, we
674           // should be ok. On close, we flushed using sequenceid obtained from
675           // hosting regionserver so no need to propagate the sequenceid returned
676           // out of initialize below up into regionserver as we normally do.
677           // TODO: Verify.
678           this.parent.initialize();
679         } catch (IOException e) {
680           LOG.error("Failed rollbacking CLOSED_PARENT_REGION of region " +
681             this.parent.getRegionNameAsString(), e);
682           throw new RuntimeException(e);
683         }
684         break;
685 
686       case STARTED_REGION_A_CREATION:
687         this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_a);
688         break;
689 
690       case STARTED_REGION_B_CREATION:
691         this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_b);
692         break;
693 
694       case OFFLINED_PARENT:
695         if (services != null) services.addToOnlineRegions(this.parent);
696         break;
697 
698       case PONR:
699         // We got to the point-of-no-return so we need to just abort. Return
700         // immediately.  Do not clean up created daughter regions.  They need
701         // to be in place so we don't delete the parent region mistakenly.
702         // See HBASE-3872.
703         return false;
704 
705       default:
706         throw new RuntimeException("Unhandled journal entry: " + je);
707       }
708     }
709     // Coprocessor callback
710     if (this.parent.getCoprocessorHost() != null) {
711       this.parent.getCoprocessorHost().postRollBackSplit();
712     }
713     return result;
714   }
715 
716   HRegionInfo getFirstDaughter() {
717     return hri_a;
718   }
719 
720   HRegionInfo getSecondDaughter() {
721     return hri_b;
722   }
723 
724   private static void cleanZK(final Server server, final HRegionInfo hri) {
725     try {
726       // Only delete if its in expected state; could have been hijacked.
727       ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
728         EventType.RS_ZK_REGION_SPLITTING);
729     } catch (KeeperException e) {
730       server.abort("Failed cleanup of " + hri.getRegionNameAsString(), e);
731     }
732   }
733 
734   /**
735    * Creates a new ephemeral node in the SPLITTING state for the specified region.
736    * Create it ephemeral in case regionserver dies mid-split.
737    *
738    * <p>Does not transition nodes from other states.  If a node already exists
739    * for this region, a {@link NodeExistsException} will be thrown.
740    *
741    * @param zkw zk reference
742    * @param region region to be created as offline
743    * @param serverName server event originates from
744    * @return Version of znode created.
745    * @throws KeeperException
746    * @throws IOException
747    */
748   int createNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo region,
749       final ServerName serverName) throws KeeperException, IOException {
750     LOG.debug(zkw.prefix("Creating ephemeral node for " +
751       region.getEncodedName() + " in SPLITTING state"));
752     RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
753         region.getRegionName(), serverName);
754     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
755     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
756       throw new IOException("Failed create of ephemeral " + node);
757     }
758     // Transition node from SPLITTING to SPLITTING and pick up version so we
759     // can be sure this znode is ours; version is needed deleting.
760     return transitionNodeSplitting(zkw, region, serverName, -1);
761   }
762 
763   /**
764    * Transitions an existing node for the specified region which is
765    * currently in the SPLITTING state to be in the SPLIT state.  Converts the
766    * ephemeral SPLITTING znode to an ephemeral SPLIT node.  Master cleans up
767    * SPLIT znode when it reads it (or if we crash, zk will clean it up).
768    *
769    * <p>Does not transition nodes from other states.  If for some reason the
770    * node could not be transitioned, the method returns -1.  If the transition
771    * is successful, the version of the node after transition is returned.
772    *
773    * <p>This method can fail and return false for three different reasons:
774    * <ul><li>Node for this region does not exist</li>
775    * <li>Node for this region is not in SPLITTING state</li>
776    * <li>After verifying SPLITTING state, update fails because of wrong version
777    * (this should never actually happen since an RS only does this transition
778    * following a transition to SPLITTING.  if two RS are conflicting, one would
779    * fail the original transition to SPLITTING and not this transition)</li>
780    * </ul>
781    *
782    * <p>Does not set any watches.
783    *
784    * <p>This method should only be used by a RegionServer when completing the
785    * open of a region.
786    *
787    * @param zkw zk reference
788    * @param parent region to be transitioned to opened
789    * @param a Daughter a of split
790    * @param b Daughter b of split
791    * @param serverName server event originates from
792    * @return version of node after transition, -1 if unsuccessful transition
793    * @throws KeeperException if unexpected zookeeper exception
794    * @throws IOException
795    */
796   private static int transitionNodeSplit(ZooKeeperWatcher zkw,
797       HRegionInfo parent, HRegionInfo a, HRegionInfo b, ServerName serverName,
798       final int znodeVersion)
799   throws KeeperException, IOException {
800     byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
801     return ZKAssign.transitionNode(zkw, parent, serverName,
802       EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLIT,
803       znodeVersion, payload);
804   }
805 
806   /**
807    *
808    * @param zkw zk reference
809    * @param parent region to be transitioned to splitting
810    * @param serverName server event originates from
811    * @param version znode version
812    * @return version of node after transition, -1 if unsuccessful transition
813    * @throws KeeperException
814    * @throws IOException
815    */
816   int transitionNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo parent,
817       final ServerName serverName, final int version) throws KeeperException, IOException {
818     return ZKAssign.transitionNode(zkw, parent, serverName,
819       EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
820   }
821 
822   private static int tickleNodeSplit(ZooKeeperWatcher zkw,
823       HRegionInfo parent, HRegionInfo a, HRegionInfo b, ServerName serverName,
824       final int znodeVersion)
825   throws KeeperException, IOException {
826     byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
827     return ZKAssign.transitionNode(zkw, parent, serverName,
828       EventType.RS_ZK_REGION_SPLIT, EventType.RS_ZK_REGION_SPLIT,
829       znodeVersion, payload);
830   }
831 }