View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.List;
24  import java.util.ListIterator;
25  import java.util.Map;
26  import java.util.concurrent.Callable;
27  import java.util.concurrent.ExecutionException;
28  import java.util.concurrent.Executors;
29  import java.util.concurrent.Future;
30  import java.util.concurrent.ThreadFactory;
31  import java.util.concurrent.ThreadPoolExecutor;
32  import java.util.concurrent.TimeUnit;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.classification.InterfaceAudience;
37  import org.apache.hadoop.hbase.HRegionInfo;
38  import org.apache.hadoop.hbase.RegionTransition;
39  import org.apache.hadoop.hbase.Server;
40  import org.apache.hadoop.hbase.ServerName;
41  import org.apache.hadoop.hbase.catalog.MetaEditor;
42  import org.apache.hadoop.hbase.executor.EventType;
43  import org.apache.hadoop.hbase.util.Bytes;
44  import org.apache.hadoop.hbase.util.CancelableProgressable;
45  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
46  import org.apache.hadoop.hbase.util.HasThread;
47  import org.apache.hadoop.hbase.util.PairOfSameType;
48  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
49  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
50  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
51  import org.apache.zookeeper.KeeperException;
52  import org.apache.zookeeper.KeeperException.NodeExistsException;
53  
54  import com.google.common.util.concurrent.ThreadFactoryBuilder;
55  
56  /**
57   * Executes region split as a "transaction".  Call {@link #prepare()} to setup
58   * the transaction, {@link #execute(Server, RegionServerServices)} to run the
59   * transaction and {@link #rollback(Server, RegionServerServices)} to cleanup if execute fails.
60   *
61   * <p>Here is an example of how you would use this class:
62   * <pre>
63   *  SplitTransaction st = new SplitTransaction(this.conf, parent, midKey)
64   *  if (!st.prepare()) return;
65   *  try {
66   *    st.execute(server, services);
67   *  } catch (IOException ioe) {
68   *    try {
69   *      st.rollback(server, services);
70   *      return;
71   *    } catch (RuntimeException e) {
72   *      myAbortable.abort("Failed split, abort");
73   *    }
74   *  }
75   * </Pre>
76   * <p>This class is not thread safe.  Caller needs ensure split is run by
77   * one thread only.
78   */
79  @InterfaceAudience.Private
80  public class SplitTransaction {
81    private static final Log LOG = LogFactory.getLog(SplitTransaction.class);
82  
83    /*
84     * Region to split
85     */
86    private final HRegion parent;
87    private HRegionInfo hri_a;
88    private HRegionInfo hri_b;
89    private long fileSplitTimeout = 30000;
90    private int znodeVersion = -1;
91  
92    /*
93     * Row to split around
94     */
95    private final byte [] splitrow;
96  
97    /**
98     * Types to add to the transaction journal.
99     * Each enum is a step in the split transaction. Used to figure how much
100    * we need to rollback.
101    */
102   enum JournalEntry {
103     /**
104      * Set region as in transition, set it into SPLITTING state.
105      */
106     SET_SPLITTING_IN_ZK,
107     /**
108      * We created the temporary split data directory.
109      */
110     CREATE_SPLIT_DIR,
111     /**
112      * Closed the parent region.
113      */
114     CLOSED_PARENT_REGION,
115     /**
116      * The parent has been taken out of the server's online regions list.
117      */
118     OFFLINED_PARENT,
119     /**
120      * Started in on creation of the first daughter region.
121      */
122     STARTED_REGION_A_CREATION,
123     /**
124      * Started in on the creation of the second daughter region.
125      */
126     STARTED_REGION_B_CREATION,
127     /**
128      * Point of no return.
129      * If we got here, then transaction is not recoverable other than by
130      * crashing out the regionserver.
131      */
132     PONR
133   }
134 
135   /*
136    * Journal of how far the split transaction has progressed.
137    */
138   private final List<JournalEntry> journal = new ArrayList<JournalEntry>();
139 
140   /**
141    * Constructor
142    * @param r Region to split
143    * @param splitrow Row to split around
144    */
145   public SplitTransaction(final HRegion r, final byte [] splitrow) {
146     this.parent = r;
147     this.splitrow = splitrow;
148   }
149 
150   /**
151    * Does checks on split inputs.
152    * @return <code>true</code> if the region is splittable else
153    * <code>false</code> if it is not (e.g. its already closed, etc.).
154    */
155   public boolean prepare() {
156     if (!this.parent.isSplittable()) return false;
157     // Split key can be null if this region is unsplittable; i.e. has refs.
158     if (this.splitrow == null) return false;
159     HRegionInfo hri = this.parent.getRegionInfo();
160     parent.prepareToSplit();
161     // Check splitrow.
162     byte [] startKey = hri.getStartKey();
163     byte [] endKey = hri.getEndKey();
164     if (Bytes.equals(startKey, splitrow) ||
165         !this.parent.getRegionInfo().containsRow(splitrow)) {
166       LOG.info("Split row is not inside region key range or is equal to " +
167           "startkey: " + Bytes.toStringBinary(this.splitrow));
168       return false;
169     }
170     long rid = getDaughterRegionIdTimestamp(hri);
171     this.hri_a = new HRegionInfo(hri.getTableName(), startKey, this.splitrow, false, rid);
172     this.hri_b = new HRegionInfo(hri.getTableName(), this.splitrow, endKey, false, rid);
173     return true;
174   }
175 
176   /**
177    * Calculate daughter regionid to use.
178    * @param hri Parent {@link HRegionInfo}
179    * @return Daughter region id (timestamp) to use.
180    */
181   private static long getDaughterRegionIdTimestamp(final HRegionInfo hri) {
182     long rid = EnvironmentEdgeManager.currentTimeMillis();
183     // Regionid is timestamp.  Can't be less than that of parent else will insert
184     // at wrong location in .META. (See HBASE-710).
185     if (rid < hri.getRegionId()) {
186       LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() +
187         " but current time here is " + rid);
188       rid = hri.getRegionId() + 1;
189     }
190     return rid;
191   }
192 
193   private static IOException closedByOtherException = new IOException(
194       "Failed to close region: already closed by another thread");
195 
196   /**
197    * Prepare the regions and region files.
198    * @param server Hosting server instance.  Can be null when testing (won't try
199    * and update in zk if a null server)
200    * @param services Used to online/offline regions.
201    * @throws IOException If thrown, transaction failed.
202    *    Call {@link #rollback(Server, RegionServerServices)}
203    * @return Regions created
204    */
205   /* package */PairOfSameType<HRegion> createDaughters(final Server server,
206       final RegionServerServices services) throws IOException {
207     LOG.info("Starting split of region " + this.parent);
208     if ((server != null && server.isStopped()) ||
209         (services != null && services.isStopping())) {
210       throw new IOException("Server is stopped or stopping");
211     }
212     assert !this.parent.lock.writeLock().isHeldByCurrentThread():
213       "Unsafe to hold write lock while performing RPCs";
214 
215     // Coprocessor callback
216     if (this.parent.getCoprocessorHost() != null) {
217       this.parent.getCoprocessorHost().preSplit();
218     }
219 
220     // Coprocessor callback
221     if (this.parent.getCoprocessorHost() != null) {
222       this.parent.getCoprocessorHost().preSplit(this.splitrow);
223     }
224 
225     // If true, no cluster to write meta edits to or to update znodes in.
226     boolean testing = server == null? true:
227         server.getConfiguration().getBoolean("hbase.testing.nocluster", false);
228     this.fileSplitTimeout = testing ? this.fileSplitTimeout :
229         server.getConfiguration().getLong("hbase.regionserver.fileSplitTimeout",
230           this.fileSplitTimeout);
231 
232     // Set ephemeral SPLITTING znode up in zk.  Mocked servers sometimes don't
233     // have zookeeper so don't do zk stuff if server or zookeeper is null
234     if (server != null && server.getZooKeeper() != null) {
235       try {
236         createNodeSplitting(server.getZooKeeper(),
237           this.parent.getRegionInfo(), server.getServerName());
238       } catch (KeeperException e) {
239         throw new IOException("Failed creating SPLITTING znode on " +
240           this.parent.getRegionNameAsString(), e);
241       }
242     }
243     this.journal.add(JournalEntry.SET_SPLITTING_IN_ZK);
244     if (server != null && server.getZooKeeper() != null) {
245       try {
246         // Transition node from SPLITTING to SPLITTING after creating the split node.
247         // Master will get the callback for node change only if the transition is successful.
248         // Note that if the transition fails then the rollback will delete the created znode
249         // as the journal entry SET_SPLITTING_IN_ZK is added.
250         // TODO : May be we can add some new state to znode and handle the new state incase
251         //        of success/failure
252         this.znodeVersion = transitionNodeSplitting(server.getZooKeeper(),
253             this.parent.getRegionInfo(), server.getServerName(), -1);
254       } catch (KeeperException e) {
255         throw new IOException("Failed setting SPLITTING znode on "
256             + this.parent.getRegionNameAsString(), e);
257       }
258     }
259 
260     this.parent.getRegionFileSystem().createSplitsDir();
261     this.journal.add(JournalEntry.CREATE_SPLIT_DIR);
262 
263     Map<byte[], List<StoreFile>> hstoreFilesToSplit = null;
264     Exception exceptionToThrow = null;
265     try{
266       hstoreFilesToSplit = this.parent.close(false);
267     } catch (Exception e) {
268       exceptionToThrow = e;
269     }
270     if (exceptionToThrow == null && hstoreFilesToSplit == null) {
271       // The region was closed by a concurrent thread.  We can't continue
272       // with the split, instead we must just abandon the split.  If we
273       // reopen or split this could cause problems because the region has
274       // probably already been moved to a different server, or is in the
275       // process of moving to a different server.
276       exceptionToThrow = closedByOtherException;
277     }
278     if (exceptionToThrow != closedByOtherException) {
279       this.journal.add(JournalEntry.CLOSED_PARENT_REGION);
280     }
281     if (exceptionToThrow != null) {
282       if (exceptionToThrow instanceof IOException) throw (IOException)exceptionToThrow;
283       throw new IOException(exceptionToThrow);
284     }
285     if (!testing) {
286       services.removeFromOnlineRegions(this.parent, null);
287     }
288     this.journal.add(JournalEntry.OFFLINED_PARENT);
289 
290     // TODO: If splitStoreFiles were multithreaded would we complete steps in
291     // less elapsed time?  St.Ack 20100920
292     //
293     // splitStoreFiles creates daughter region dirs under the parent splits dir
294     // Nothing to unroll here if failure -- clean up of CREATE_SPLIT_DIR will
295     // clean this up.
296     splitStoreFiles(hstoreFilesToSplit);
297 
298     // Log to the journal that we are creating region A, the first daughter
299     // region.  We could fail halfway through.  If we do, we could have left
300     // stuff in fs that needs cleanup -- a storefile or two.  Thats why we
301     // add entry to journal BEFORE rather than AFTER the change.
302     this.journal.add(JournalEntry.STARTED_REGION_A_CREATION);
303     HRegion a = this.parent.createDaughterRegionFromSplits(this.hri_a);
304 
305     // Ditto
306     this.journal.add(JournalEntry.STARTED_REGION_B_CREATION);
307     HRegion b = this.parent.createDaughterRegionFromSplits(this.hri_b);
308 
309     // This is the point of no return.  Adding subsequent edits to .META. as we
310     // do below when we do the daughter opens adding each to .META. can fail in
311     // various interesting ways the most interesting of which is a timeout
312     // BUT the edits all go through (See HBASE-3872).  IF we reach the PONR
313     // then subsequent failures need to crash out this regionserver; the
314     // server shutdown processing should be able to fix-up the incomplete split.
315     // The offlined parent will have the daughters as extra columns.  If
316     // we leave the daughter regions in place and do not remove them when we
317     // crash out, then they will have their references to the parent in place
318     // still and the server shutdown fixup of .META. will point to these
319     // regions.
320     // We should add PONR JournalEntry before offlineParentInMeta,so even if
321     // OfflineParentInMeta timeout,this will cause regionserver exit,and then
322     // master ServerShutdownHandler will fix daughter & avoid data loss. (See
323     // HBase-4562).
324     this.journal.add(JournalEntry.PONR);
325 
326     // Edit parent in meta.  Offlines parent region and adds splita and splitb
327     // as an atomic update. See HBASE-7721. This update to META makes the region
328     // will determine whether the region is split or not in case of failures.
329     // If it is successful, master will roll-forward, if not, master will rollback
330     // and assign the parent region.
331     if (!testing) {
332       MetaEditor.splitRegion(server.getCatalogTracker(), parent.getRegionInfo(),
333           a.getRegionInfo(), b.getRegionInfo(), server.getServerName());
334     }
335     return new PairOfSameType<HRegion>(a, b);
336   }
337 
338   /**
339    * Perform time consuming opening of the daughter regions.
340    * @param server Hosting server instance.  Can be null when testing (won't try
341    * and update in zk if a null server)
342    * @param services Used to online/offline regions.
343    * @param a first daughter region
344    * @param a second daughter region
345    * @throws IOException If thrown, transaction failed.
346    *          Call {@link #rollback(Server, RegionServerServices)}
347    */
348   /* package */void openDaughters(final Server server,
349       final RegionServerServices services, HRegion a, HRegion b)
350       throws IOException {
351     boolean stopped = server != null && server.isStopped();
352     boolean stopping = services != null && services.isStopping();
353     // TODO: Is this check needed here?
354     if (stopped || stopping) {
355       LOG.info("Not opening daughters " +
356           b.getRegionInfo().getRegionNameAsString() +
357           " and " +
358           a.getRegionInfo().getRegionNameAsString() +
359           " because stopping=" + stopping + ", stopped=" + stopped);
360     } else {
361       // Open daughters in parallel.
362       DaughterOpener aOpener = new DaughterOpener(server, a);
363       DaughterOpener bOpener = new DaughterOpener(server, b);
364       aOpener.start();
365       bOpener.start();
366       try {
367         aOpener.join();
368         bOpener.join();
369       } catch (InterruptedException e) {
370         Thread.currentThread().interrupt();
371         throw new IOException("Interrupted " + e.getMessage());
372       }
373       if (aOpener.getException() != null) {
374         throw new IOException("Failed " +
375           aOpener.getName(), aOpener.getException());
376       }
377       if (bOpener.getException() != null) {
378         throw new IOException("Failed " +
379           bOpener.getName(), bOpener.getException());
380       }
381       if (services != null) {
382         try {
383           // add 2nd daughter first (see HBASE-4335)
384           services.postOpenDeployTasks(b, server.getCatalogTracker());
385           // Should add it to OnlineRegions
386           services.addToOnlineRegions(b);
387           services.postOpenDeployTasks(a, server.getCatalogTracker());
388           services.addToOnlineRegions(a);
389         } catch (KeeperException ke) {
390           throw new IOException(ke);
391         }
392       }
393     }
394   }
395 
396   /**
397    * Finish off split transaction, transition the zknode
398    * @param server Hosting server instance.  Can be null when testing (won't try
399    * and update in zk if a null server)
400    * @param services Used to online/offline regions.
401    * @param a first daughter region
402    * @param a second daughter region
403    * @throws IOException If thrown, transaction failed.
404    *          Call {@link #rollback(Server, RegionServerServices)}
405    */
406   /* package */void transitionZKNode(final Server server,
407       final RegionServerServices services, HRegion a, HRegion b)
408       throws IOException {
409     // Tell master about split by updating zk.  If we fail, abort.
410     if (server != null && server.getZooKeeper() != null) {
411       try {
412         this.znodeVersion = transitionNodeSplit(server.getZooKeeper(),
413           parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
414           server.getServerName(), this.znodeVersion);
415 
416         int spins = 0;
417         // Now wait for the master to process the split. We know it's done
418         // when the znode is deleted. The reason we keep tickling the znode is
419         // that it's possible for the master to miss an event.
420         do {
421           if (spins % 10 == 0) {
422             LOG.debug("Still waiting on the master to process the split for " +
423                 this.parent.getRegionInfo().getEncodedName());
424           }
425           Thread.sleep(100);
426           // When this returns -1 it means the znode doesn't exist
427           this.znodeVersion = tickleNodeSplit(server.getZooKeeper(),
428             parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
429             server.getServerName(), this.znodeVersion);
430           spins++;
431         } while (this.znodeVersion != -1 && !server.isStopped()
432             && !services.isStopping());
433       } catch (Exception e) {
434         if (e instanceof InterruptedException) {
435           Thread.currentThread().interrupt();
436         }
437         throw new IOException("Failed telling master about split", e);
438       }
439     }
440 
441     // Coprocessor callback
442     if (this.parent.getCoprocessorHost() != null) {
443       this.parent.getCoprocessorHost().postSplit(a,b);
444     }
445 
446     // Leaving here, the splitdir with its dross will be in place but since the
447     // split was successful, just leave it; it'll be cleaned when parent is
448     // deleted and cleaned up.
449   }
450 
451   /**
452    * Run the transaction.
453    * @param server Hosting server instance.  Can be null when testing (won't try
454    * and update in zk if a null server)
455    * @param services Used to online/offline regions.
456    * @throws IOException If thrown, transaction failed.
457    *          Call {@link #rollback(Server, RegionServerServices)}
458    * @return Regions created
459    * @throws IOException
460    * @see #rollback(Server, RegionServerServices)
461    */
462   public PairOfSameType<HRegion> execute(final Server server,
463       final RegionServerServices services)
464   throws IOException {
465     PairOfSameType<HRegion> regions = createDaughters(server, services);
466     openDaughters(server, services, regions.getFirst(), regions.getSecond());
467     transitionZKNode(server, services, regions.getFirst(), regions.getSecond());
468     return regions;
469   }
470 
471   /*
472    * Open daughter region in its own thread.
473    * If we fail, abort this hosting server.
474    */
475   class DaughterOpener extends HasThread {
476     private final Server server;
477     private final HRegion r;
478     private Throwable t = null;
479 
480     DaughterOpener(final Server s, final HRegion r) {
481       super((s == null? "null-services": s.getServerName()) +
482         "-daughterOpener=" + r.getRegionInfo().getEncodedName());
483       setDaemon(true);
484       this.server = s;
485       this.r = r;
486     }
487 
488     /**
489      * @return Null if open succeeded else exception that causes us fail open.
490      * Call it after this thread exits else you may get wrong view on result.
491      */
492     Throwable getException() {
493       return this.t;
494     }
495 
496     @Override
497     public void run() {
498       try {
499         openDaughterRegion(this.server, r);
500       } catch (Throwable t) {
501         this.t = t;
502       }
503     }
504   }
505 
506   /**
507    * Open daughter regions, add them to online list and update meta.
508    * @param server
509    * @param daughter
510    * @throws IOException
511    * @throws KeeperException
512    */
513   void openDaughterRegion(final Server server, final HRegion daughter)
514   throws IOException, KeeperException {
515     HRegionInfo hri = daughter.getRegionInfo();
516     LoggingProgressable reporter = server == null ? null
517         : new LoggingProgressable(hri, server.getConfiguration().getLong(
518             "hbase.regionserver.split.daughter.open.log.interval", 10000));
519     daughter.openHRegion(reporter);
520   }
521 
522   static class LoggingProgressable implements CancelableProgressable {
523     private final HRegionInfo hri;
524     private long lastLog = -1;
525     private final long interval;
526 
527     LoggingProgressable(final HRegionInfo hri, final long interval) {
528       this.hri = hri;
529       this.interval = interval;
530     }
531 
532     @Override
533     public boolean progress() {
534       long now = System.currentTimeMillis();
535       if (now - lastLog > this.interval) {
536         LOG.info("Opening " + this.hri.getRegionNameAsString());
537         this.lastLog = now;
538       }
539       return true;
540     }
541   }
542 
543   private void splitStoreFiles(final Map<byte[], List<StoreFile>> hstoreFilesToSplit)
544       throws IOException {
545     if (hstoreFilesToSplit == null) {
546       // Could be null because close didn't succeed -- for now consider it fatal
547       throw new IOException("Close returned empty list of StoreFiles");
548     }
549     // The following code sets up a thread pool executor with as many slots as
550     // there's files to split. It then fires up everything, waits for
551     // completion and finally checks for any exception
552     int nbFiles = hstoreFilesToSplit.size();
553     if (nbFiles == 0) {
554       // no file needs to be splitted.
555       return;
556     }
557     ThreadFactoryBuilder builder = new ThreadFactoryBuilder();
558     builder.setNameFormat("StoreFileSplitter-%1$d");
559     ThreadFactory factory = builder.build();
560     ThreadPoolExecutor threadPool =
561       (ThreadPoolExecutor) Executors.newFixedThreadPool(nbFiles, factory);
562     List<Future<Void>> futures = new ArrayList<Future<Void>>(nbFiles);
563 
564     // Split each store file.
565     for (Map.Entry<byte[], List<StoreFile>> entry: hstoreFilesToSplit.entrySet()) {
566       for (StoreFile sf: entry.getValue()) {
567         StoreFileSplitter sfs = new StoreFileSplitter(entry.getKey(), sf);
568         futures.add(threadPool.submit(sfs));
569       }
570     }
571     // Shutdown the pool
572     threadPool.shutdown();
573 
574     // Wait for all the tasks to finish
575     try {
576       boolean stillRunning = !threadPool.awaitTermination(
577           this.fileSplitTimeout, TimeUnit.MILLISECONDS);
578       if (stillRunning) {
579         threadPool.shutdownNow();
580         // wait for the thread to shutdown completely.
581         while (!threadPool.isTerminated()) {
582           Thread.sleep(50);
583         }
584         throw new IOException("Took too long to split the" +
585             " files and create the references, aborting split");
586       }
587     } catch (InterruptedException e) {
588       Thread.currentThread().interrupt();
589       throw new IOException("Interrupted while waiting for file splitters", e);
590     }
591 
592     // Look for any exception
593     for (Future<Void> future: futures) {
594       try {
595         future.get();
596       } catch (InterruptedException e) {
597         Thread.currentThread().interrupt();
598         throw new IOException(
599             "Interrupted while trying to get the results of file splitters", e);
600       } catch (ExecutionException e) {
601         throw new IOException(e);
602       }
603     }
604   }
605 
606   private void splitStoreFile(final byte[] family, final StoreFile sf) throws IOException {
607     HRegionFileSystem fs = this.parent.getRegionFileSystem();
608     String familyName = Bytes.toString(family);
609     fs.splitStoreFile(this.hri_a, familyName, sf, this.splitrow, false);
610     fs.splitStoreFile(this.hri_b, familyName, sf, this.splitrow, true);
611   }
612 
613   /**
614    * Utility class used to do the file splitting / reference writing
615    * in parallel instead of sequentially.
616    */
617   class StoreFileSplitter implements Callable<Void> {
618     private final byte[] family;
619     private final StoreFile sf;
620 
621     /**
622      * Constructor that takes what it needs to split
623      * @param family Family that contains the store file
624      * @param sf which file
625      */
626     public StoreFileSplitter(final byte[] family, final StoreFile sf) {
627       this.sf = sf;
628       this.family = family;
629     }
630 
631     public Void call() throws IOException {
632       splitStoreFile(family, sf);
633       return null;
634     }
635   }
636 
637   /**
638    * @param server Hosting server instance (May be null when testing).
639    * @param services
640    * @throws IOException If thrown, rollback failed.  Take drastic action.
641    * @return True if we successfully rolled back, false if we got to the point
642    * of no return and so now need to abort the server to minimize damage.
643    */
644   public boolean rollback(final Server server, final RegionServerServices services)
645   throws IOException {
646     // Coprocessor callback
647     if (this.parent.getCoprocessorHost() != null) {
648       this.parent.getCoprocessorHost().preRollBackSplit();
649     }
650 
651     boolean result = true;
652     ListIterator<JournalEntry> iterator =
653       this.journal.listIterator(this.journal.size());
654     // Iterate in reverse.
655     while (iterator.hasPrevious()) {
656       JournalEntry je = iterator.previous();
657       switch(je) {
658 
659       case SET_SPLITTING_IN_ZK:
660         if (server != null && server.getZooKeeper() != null) {
661           cleanZK(server, this.parent.getRegionInfo());
662         }
663         break;
664 
665       case CREATE_SPLIT_DIR:
666         this.parent.writestate.writesEnabled = true;
667         this.parent.getRegionFileSystem().cleanupSplitsDir();
668         break;
669 
670       case CLOSED_PARENT_REGION:
671         try {
672           // So, this returns a seqid but if we just closed and then reopened, we
673           // should be ok. On close, we flushed using sequenceid obtained from
674           // hosting regionserver so no need to propagate the sequenceid returned
675           // out of initialize below up into regionserver as we normally do.
676           // TODO: Verify.
677           this.parent.initialize();
678         } catch (IOException e) {
679           LOG.error("Failed rollbacking CLOSED_PARENT_REGION of region " +
680             this.parent.getRegionNameAsString(), e);
681           throw new RuntimeException(e);
682         }
683         break;
684 
685       case STARTED_REGION_A_CREATION:
686         this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_a);
687         break;
688 
689       case STARTED_REGION_B_CREATION:
690         this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_b);
691         break;
692 
693       case OFFLINED_PARENT:
694         if (services != null) services.addToOnlineRegions(this.parent);
695         break;
696 
697       case PONR:
698         // We got to the point-of-no-return so we need to just abort. Return
699         // immediately.  Do not clean up created daughter regions.  They need
700         // to be in place so we don't delete the parent region mistakenly.
701         // See HBASE-3872.
702         return false;
703 
704       default:
705         throw new RuntimeException("Unhandled journal entry: " + je);
706       }
707     }
708     // Coprocessor callback
709     if (this.parent.getCoprocessorHost() != null) {
710       this.parent.getCoprocessorHost().postRollBackSplit();
711     }
712     return result;
713   }
714 
715   HRegionInfo getFirstDaughter() {
716     return hri_a;
717   }
718 
719   HRegionInfo getSecondDaughter() {
720     return hri_b;
721   }
722 
723   private static void cleanZK(final Server server, final HRegionInfo hri) {
724     try {
725       // Only delete if its in expected state; could have been hijacked.
726       ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
727         EventType.RS_ZK_REGION_SPLITTING);
728     } catch (KeeperException e) {
729       server.abort("Failed cleanup of " + hri.getRegionNameAsString(), e);
730     }
731   }
732 
733   /**
734    * Creates a new ephemeral node in the SPLITTING state for the specified region.
735    * Create it ephemeral in case regionserver dies mid-split.
736    *
737    * <p>Does not transition nodes from other states.  If a node already exists
738    * for this region, a {@link NodeExistsException} will be thrown.
739    *
740    * @param zkw zk reference
741    * @param region region to be created as offline
742    * @param serverName server event originates from
743    * @return Version of znode created.
744    * @throws KeeperException
745    * @throws IOException
746    */
747   int createNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo region,
748       final ServerName serverName) throws KeeperException, IOException {
749     LOG.debug(zkw.prefix("Creating ephemeral node for " +
750       region.getEncodedName() + " in SPLITTING state"));
751     RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
752         region.getRegionName(), serverName);
753     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
754     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
755       throw new IOException("Failed create of ephemeral " + node);
756     }
757     // Transition node from SPLITTING to SPLITTING and pick up version so we
758     // can be sure this znode is ours; version is needed deleting.
759     return transitionNodeSplitting(zkw, region, serverName, -1);
760   }
761 
762   /**
763    * Transitions an existing node for the specified region which is
764    * currently in the SPLITTING state to be in the SPLIT state.  Converts the
765    * ephemeral SPLITTING znode to an ephemeral SPLIT node.  Master cleans up
766    * SPLIT znode when it reads it (or if we crash, zk will clean it up).
767    *
768    * <p>Does not transition nodes from other states.  If for some reason the
769    * node could not be transitioned, the method returns -1.  If the transition
770    * is successful, the version of the node after transition is returned.
771    *
772    * <p>This method can fail and return false for three different reasons:
773    * <ul><li>Node for this region does not exist</li>
774    * <li>Node for this region is not in SPLITTING state</li>
775    * <li>After verifying SPLITTING state, update fails because of wrong version
776    * (this should never actually happen since an RS only does this transition
777    * following a transition to SPLITTING.  if two RS are conflicting, one would
778    * fail the original transition to SPLITTING and not this transition)</li>
779    * </ul>
780    *
781    * <p>Does not set any watches.
782    *
783    * <p>This method should only be used by a RegionServer when completing the
784    * open of a region.
785    *
786    * @param zkw zk reference
787    * @param parent region to be transitioned to opened
788    * @param a Daughter a of split
789    * @param b Daughter b of split
790    * @param serverName server event originates from
791    * @return version of node after transition, -1 if unsuccessful transition
792    * @throws KeeperException if unexpected zookeeper exception
793    * @throws IOException
794    */
795   private static int transitionNodeSplit(ZooKeeperWatcher zkw,
796       HRegionInfo parent, HRegionInfo a, HRegionInfo b, ServerName serverName,
797       final int znodeVersion)
798   throws KeeperException, IOException {
799     byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
800     return ZKAssign.transitionNode(zkw, parent, serverName,
801       EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLIT,
802       znodeVersion, payload);
803   }
804 
805   /**
806    *
807    * @param zkw zk reference
808    * @param parent region to be transitioned to splitting
809    * @param serverName server event originates from
810    * @param version znode version
811    * @return version of node after transition, -1 if unsuccessful transition
812    * @throws KeeperException
813    * @throws IOException
814    */
815   int transitionNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo parent,
816       final ServerName serverName, final int version) throws KeeperException, IOException {
817     return ZKAssign.transitionNode(zkw, parent, serverName,
818       EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
819   }
820 
821   private static int tickleNodeSplit(ZooKeeperWatcher zkw,
822       HRegionInfo parent, HRegionInfo a, HRegionInfo b, ServerName serverName,
823       final int znodeVersion)
824   throws KeeperException, IOException {
825     byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
826     return ZKAssign.transitionNode(zkw, parent, serverName,
827       EventType.RS_ZK_REGION_SPLIT, EventType.RS_ZK_REGION_SPLIT,
828       znodeVersion, payload);
829   }
830 }