View Javadoc

1   /**
2    * Copyright The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements. See the NOTICE file distributed with this
6    * work for additional information regarding copyright ownership. The ASF
7    * licenses this file to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance with the License.
9    * You may obtain a copy of the License at
10   *
11   * http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
16   * License for the specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.List;
24  import java.util.ListIterator;
25  import java.util.Map;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.classification.InterfaceAudience;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.HRegionInfo;
32  import org.apache.hadoop.hbase.RegionTransition;
33  import org.apache.hadoop.hbase.Server;
34  import org.apache.hadoop.hbase.ServerName;
35  import org.apache.hadoop.hbase.catalog.MetaEditor;
36  import org.apache.hadoop.hbase.catalog.MetaReader;
37  import org.apache.hadoop.hbase.executor.EventType;
38  import org.apache.hadoop.hbase.regionserver.SplitTransaction.LoggingProgressable;
39  import org.apache.hadoop.hbase.util.Bytes;
40  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
41  import org.apache.hadoop.hbase.util.Pair;
42  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
43  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
44  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
45  import org.apache.zookeeper.KeeperException;
46  import org.apache.zookeeper.KeeperException.NodeExistsException;
47  
48  /**
49   * Executes region merge as a "transaction". It is similar with
50   * SplitTransaction. Call {@link #prepare(RegionServerServices)} to setup the
51   * transaction, {@link #execute(Server, RegionServerServices)} to run the
52   * transaction and {@link #rollback(Server, RegionServerServices)} to cleanup if
53   * execute fails.
54   * 
55   * <p>
56   * Here is an example of how you would use this class:
57   * 
58   * <pre>
59   *  RegionMergeTransaction mt = new RegionMergeTransaction(this.conf, parent, midKey)
60   *  if (!mt.prepare(services)) return;
61   *  try {
62   *    mt.execute(server, services);
63   *  } catch (IOException ioe) {
64   *    try {
65   *      mt.rollback(server, services);
66   *      return;
67   *    } catch (RuntimeException e) {
68   *      myAbortable.abort("Failed merge, abort");
69   *    }
70   *  }
71   * </Pre>
72   * <p>
73   * This class is not thread safe. Caller needs ensure merge is run by one thread
74   * only.
75   */
76  @InterfaceAudience.Private
77  public class RegionMergeTransaction {
78    private static final Log LOG = LogFactory.getLog(RegionMergeTransaction.class);
79  
80    // Merged region info
81    private HRegionInfo mergedRegionInfo;
82    // region_a sorts before region_b
83    private final HRegion region_a;
84    private final HRegion region_b;
85    // merges dir is under region_a
86    private final Path mergesdir;
87    private int znodeVersion = -1;
88    // We only merge adjacent regions if forcible is false
89    private final boolean forcible;
90  
91    /**
92     * Types to add to the transaction journal. Each enum is a step in the merge
93     * transaction. Used to figure how much we need to rollback.
94     */
95    enum JournalEntry {
96      /**
97       * Set region as in transition, set it into MERGING state.
98       */
99      SET_MERGING_IN_ZK,
100     /**
101      * We created the temporary merge data directory.
102      */
103     CREATED_MERGE_DIR,
104     /**
105      * Closed the merging region A.
106      */
107     CLOSED_REGION_A,
108     /**
109      * The merging region A has been taken out of the server's online regions list.
110      */
111     OFFLINED_REGION_A,
112     /**
113      * Closed the merging region B.
114      */
115     CLOSED_REGION_B,
116     /**
117      * The merging region B has been taken out of the server's online regions list.
118      */
119     OFFLINED_REGION_B,
120     /**
121      * Started in on creation of the merged region.
122      */
123     STARTED_MERGED_REGION_CREATION,
124     /**
125      * Point of no return. If we got here, then transaction is not recoverable
126      * other than by crashing out the regionserver.
127      */
128     PONR
129   }
130 
131   /*
132    * Journal of how far the merge transaction has progressed.
133    */
134   private final List<JournalEntry> journal = new ArrayList<JournalEntry>();
135 
136   private static IOException closedByOtherException = new IOException(
137       "Failed to close region: already closed by another thread");
138 
139   /**
140    * Constructor
141    * @param a region a to merge
142    * @param b region b to merge
143    * @param forcible if false, we will only merge adjacent regions
144    */
145   public RegionMergeTransaction(final HRegion a, final HRegion b,
146       final boolean forcible) {
147     if (a.getRegionInfo().compareTo(b.getRegionInfo()) <= 0) {
148       this.region_a = a;
149       this.region_b = b;
150     } else {
151       this.region_a = b;
152       this.region_b = a;
153     }
154     this.forcible = forcible;
155     this.mergesdir = region_a.getRegionFileSystem().getMergesDir();
156   }
157 
158   /**
159    * Does checks on merge inputs.
160    * @param services
161    * @return <code>true</code> if the regions are mergeable else
162    *         <code>false</code> if they are not (e.g. its already closed, etc.).
163    */
164   public boolean prepare(final RegionServerServices services) {
165     if (!region_a.getTableDesc().getNameAsString()
166         .equals(region_b.getTableDesc().getNameAsString())) {
167       LOG.info("Can't merge regions " + region_a + "," + region_b
168           + " because they do not belong to the same table");
169       return false;
170     }
171     if (region_a.getRegionInfo().equals(region_b.getRegionInfo())) {
172       LOG.info("Can't merge the same region " + region_a);
173       return false;
174     }
175     if (!forcible && !HRegionInfo.areAdjacent(region_a.getRegionInfo(),
176             region_b.getRegionInfo())) {
177       String msg = "Skip merging " + this.region_a.getRegionNameAsString()
178           + " and " + this.region_b.getRegionNameAsString()
179           + ", because they are not adjacent.";
180       LOG.info(msg);
181       return false;
182     }
183     if (!this.region_a.isMergeable() || !this.region_b.isMergeable()) {
184       return false;
185     }
186     try {
187       boolean regionAHasMergeQualifier = hasMergeQualifierInMeta(services,
188           region_a.getRegionName());
189       if (regionAHasMergeQualifier ||
190           hasMergeQualifierInMeta(services, region_b.getRegionName())) {
191         LOG.debug("Region " + (regionAHasMergeQualifier ? region_a.getRegionNameAsString()
192                 : region_b.getRegionNameAsString())
193             + " is not mergeable because it has merge qualifier in META");
194         return false;
195       }
196     } catch (IOException e) {
197       LOG.warn("Failed judging whether merge transaction is available for "
198               + region_a.getRegionNameAsString() + " and "
199               + region_b.getRegionNameAsString(), e);
200       return false;
201     }
202 
203     // WARN: make sure there is no parent region of the two merging regions in
204     // .META. If exists, fixing up daughters would cause daughter regions(we
205     // have merged one) online again when we restart master, so we should clear
206     // the parent region to prevent the above case
207     // Since HBASE-7721, we don't need fix up daughters any more. so here do
208     // nothing
209 
210     this.mergedRegionInfo = getMergedRegionInfo(region_a.getRegionInfo(),
211         region_b.getRegionInfo());
212     return true;
213   }
214 
215   /**
216    * Run the transaction.
217    * @param server Hosting server instance. Can be null when testing (won't try
218    *          and update in zk if a null server)
219    * @param services Used to online/offline regions.
220    * @throws IOException If thrown, transaction failed. Call
221    *           {@link #rollback(Server, RegionServerServices)}
222    * @return merged region
223    * @throws IOException
224    * @see #rollback(Server, RegionServerServices)
225    */
226   public HRegion execute(final Server server,
227       final RegionServerServices services) throws IOException {
228     HRegion mergedRegion = createMergedRegion(server, services);
229     openMergedRegion(server, services, mergedRegion);
230     transitionZKNode(server, services);
231     return mergedRegion;
232   }
233 
234   /**
235    * Prepare the merged region and region files.
236    * @param server Hosting server instance. Can be null when testing (won't try
237    *          and update in zk if a null server)
238    * @param services Used to online/offline regions.
239    * @return merged region
240    * @throws IOException If thrown, transaction failed. Call
241    *           {@link #rollback(Server, RegionServerServices)}
242    */
243   HRegion createMergedRegion(final Server server,
244       final RegionServerServices services) throws IOException {
245     LOG.info("Starting merge of " + region_a + " and "
246         + region_b.getRegionNameAsString() + ", forcible=" + forcible);
247     if ((server != null && server.isStopped())
248         || (services != null && services.isStopping())) {
249       throw new IOException("Server is stopped or stopping");
250     }
251 
252     // If true, no cluster to write meta edits to or to update znodes in.
253     boolean testing = server == null ? true : server.getConfiguration()
254         .getBoolean("hbase.testing.nocluster", false);
255 
256     // Set ephemeral MERGING znode up in zk. Mocked servers sometimes don't
257     // have zookeeper so don't do zk stuff if server or zookeeper is null
258     if (server != null && server.getZooKeeper() != null) {
259       try {
260         createNodeMerging(server.getZooKeeper(), this.mergedRegionInfo,
261             server.getServerName());
262       } catch (KeeperException e) {
263         throw new IOException("Failed creating MERGING znode on "
264             + this.mergedRegionInfo.getRegionNameAsString(), e);
265       }
266     }
267     this.journal.add(JournalEntry.SET_MERGING_IN_ZK);
268     if (server != null && server.getZooKeeper() != null) {
269       try {
270         // Transition node from MERGING to MERGING after creating the merge
271         // node. Master will get the callback for node change only if the
272         // transition is successful.
273         // Note that if the transition fails then the rollback will delete the
274         // created znode as the journal entry SET_MERGING_IN_ZK is added.
275         this.znodeVersion = transitionNodeMerging(server.getZooKeeper(),
276             this.mergedRegionInfo, server.getServerName(), -1);
277       } catch (KeeperException e) {
278         throw new IOException("Failed setting MERGING znode on "
279             + this.mergedRegionInfo.getRegionNameAsString(), e);
280       }
281     }
282 
283     this.region_a.getRegionFileSystem().createMergesDir();
284     this.journal.add(JournalEntry.CREATED_MERGE_DIR);
285 
286     Map<byte[], List<StoreFile>> hstoreFilesOfRegionA = closeAndOfflineRegion(
287         services, this.region_a, true, testing);
288     Map<byte[], List<StoreFile>> hstoreFilesOfRegionB = closeAndOfflineRegion(
289         services, this.region_b, false, testing);
290 
291     assert hstoreFilesOfRegionA != null && hstoreFilesOfRegionB != null;
292 
293 
294     //
295     // mergeStoreFiles creates merged region dirs under the region_a merges dir
296     // Nothing to unroll here if failure -- clean up of CREATE_MERGE_DIR will
297     // clean this up.
298     mergeStoreFiles(hstoreFilesOfRegionA, hstoreFilesOfRegionB);
299 
300     // Log to the journal that we are creating merged region. We could fail
301     // halfway through. If we do, we could have left
302     // stuff in fs that needs cleanup -- a storefile or two. Thats why we
303     // add entry to journal BEFORE rather than AFTER the change.
304     this.journal.add(JournalEntry.STARTED_MERGED_REGION_CREATION);
305     HRegion mergedRegion = createMergedRegionFromMerges(this.region_a,
306         this.region_b, this.mergedRegionInfo);
307 
308 
309     // This is the point of no return. Similar with SplitTransaction.
310     // IF we reach the PONR then subsequent failures need to crash out this
311     // regionserver
312     this.journal.add(JournalEntry.PONR);
313 
314     // Add merged region and delete region_a and region_b
315     // as an atomic update. See HBASE-7721. This update to META makes the region
316     // will determine whether the region is merged or not in case of failures.
317     // If it is successful, master will roll-forward, if not, master will
318     // rollback
319     if (!testing) {
320       MetaEditor.mergeRegions(server.getCatalogTracker(),
321           mergedRegion.getRegionInfo(), region_a.getRegionInfo(),
322           region_b.getRegionInfo(), server.getServerName());
323     }
324     return mergedRegion;
325   }
326 
327   /**
328    * Create a merged region from the merges directory under region a. In order
329    * to mock it for tests, place it with a new method.
330    * @param a hri of region a
331    * @param b hri of region b
332    * @param mergedRegion hri of merged region
333    * @return merged HRegion.
334    * @throws IOException
335    */
336   HRegion createMergedRegionFromMerges(final HRegion a, final HRegion b,
337       final HRegionInfo mergedRegion) throws IOException {
338     return a.createMergedRegionFromMerges(mergedRegion, b);
339   }
340 
341   /**
342    * Close the merging region and offline it in regionserver
343    * @param services
344    * @param region
345    * @param isRegionA true if it is merging region a, false if it is region b
346    * @param testing true if it is testing
347    * @return a map of family name to list of store files
348    * @throws IOException
349    */
350   private Map<byte[], List<StoreFile>> closeAndOfflineRegion(
351       final RegionServerServices services, final HRegion region,
352       final boolean isRegionA, final boolean testing) throws IOException {
353     Map<byte[], List<StoreFile>> hstoreFilesToMerge = null;
354     Exception exceptionToThrow = null;
355     try {
356       hstoreFilesToMerge = region.close(false);
357     } catch (Exception e) {
358       exceptionToThrow = e;
359     }
360     if (exceptionToThrow == null && hstoreFilesToMerge == null) {
361       // The region was closed by a concurrent thread. We can't continue
362       // with the merge, instead we must just abandon the merge. If we
363       // reopen or merge this could cause problems because the region has
364       // probably already been moved to a different server, or is in the
365       // process of moving to a different server.
366       exceptionToThrow = closedByOtherException;
367     }
368     if (exceptionToThrow != closedByOtherException) {
369       this.journal.add(isRegionA ? JournalEntry.CLOSED_REGION_A
370           : JournalEntry.CLOSED_REGION_B);
371     }
372     if (exceptionToThrow != null) {
373       if (exceptionToThrow instanceof IOException)
374         throw (IOException) exceptionToThrow;
375       throw new IOException(exceptionToThrow);
376     }
377 
378     if (!testing) {
379       services.removeFromOnlineRegions(region, null);
380     }
381     this.journal.add(isRegionA ? JournalEntry.OFFLINED_REGION_A
382         : JournalEntry.OFFLINED_REGION_B);
383     return hstoreFilesToMerge;
384   }
385 
386   /**
387    * Get merged region info through the specified two regions
388    * @param a merging region A
389    * @param b merging region B
390    * @return the merged region info
391    */
392   public static HRegionInfo getMergedRegionInfo(final HRegionInfo a,
393       final HRegionInfo b) {
394     long rid = EnvironmentEdgeManager.currentTimeMillis();
395     // Regionid is timestamp. Merged region's id can't be less than that of
396     // merging regions else will insert at wrong location in .META.
397     if (rid < a.getRegionId() || rid < b.getRegionId()) {
398       LOG.warn("Clock skew; merging regions id are " + a.getRegionId()
399           + " and " + b.getRegionId() + ", but current time here is " + rid);
400       rid = Math.max(a.getRegionId(), b.getRegionId()) + 1;
401     }
402 
403     byte[] startKey = null;
404     byte[] endKey = null;
405     if (a.compareTo(b) <= 0) {
406       startKey = a.getStartKey();
407       endKey = b.getEndKey();
408     } else {
409       startKey = b.getStartKey();
410       endKey = a.getEndKey();
411     }
412 
413     // Merged region is sorted between two merging regions in META
414     HRegionInfo mergedRegionInfo = new HRegionInfo(a.getTableName(), startKey,
415         endKey, false, rid);
416     return mergedRegionInfo;
417   }
418 
419   /**
420    * Perform time consuming opening of the merged region.
421    * @param server Hosting server instance. Can be null when testing (won't try
422    *          and update in zk if a null server)
423    * @param services Used to online/offline regions.
424    * @param merged the merged region
425    * @throws IOException If thrown, transaction failed. Call
426    *           {@link #rollback(Server, RegionServerServices)}
427    */
428   void openMergedRegion(final Server server,
429       final RegionServerServices services, HRegion merged) throws IOException {
430     boolean stopped = server != null && server.isStopped();
431     boolean stopping = services != null && services.isStopping();
432     if (stopped || stopping) {
433       LOG.info("Not opening merged region  " + merged.getRegionNameAsString()
434           + " because stopping=" + stopping + ", stopped=" + stopped);
435       return;
436     }
437     HRegionInfo hri = merged.getRegionInfo();
438     LoggingProgressable reporter = server == null ? null
439         : new LoggingProgressable(hri, server.getConfiguration().getLong(
440             "hbase.regionserver.regionmerge.open.log.interval", 10000));
441     merged.openHRegion(reporter);
442 
443     if (services != null) {
444       try {
445         services.postOpenDeployTasks(merged, server.getCatalogTracker());
446         services.addToOnlineRegions(merged);
447       } catch (KeeperException ke) {
448         throw new IOException(ke);
449       }
450     }
451 
452   }
453 
454   /**
455    * Finish off merge transaction, transition the zknode
456    * @param server Hosting server instance. Can be null when testing (won't try
457    *          and update in zk if a null server)
458    * @param services Used to online/offline regions.
459    * @throws IOException If thrown, transaction failed. Call
460    *           {@link #rollback(Server, RegionServerServices)}
461    */
462   void transitionZKNode(final Server server, final RegionServerServices services)
463       throws IOException {
464     if (server == null || server.getZooKeeper() == null) {
465       return;
466     }
467 
468     // Tell master about merge by updating zk. If we fail, abort.
469     try {
470       this.znodeVersion = transitionNodeMerge(server.getZooKeeper(),
471           this.mergedRegionInfo, region_a.getRegionInfo(),
472           region_b.getRegionInfo(), server.getServerName(), this.znodeVersion);
473 
474       long startTime = EnvironmentEdgeManager.currentTimeMillis();
475       int spins = 0;
476       // Now wait for the master to process the merge. We know it's done
477       // when the znode is deleted. The reason we keep tickling the znode is
478       // that it's possible for the master to miss an event.
479       do {
480         if (spins % 10 == 0) {
481           LOG.debug("Still waiting on the master to process the merge for "
482               + this.mergedRegionInfo.getEncodedName() + ", waited "
483               + (EnvironmentEdgeManager.currentTimeMillis() - startTime) + "ms");
484         }
485         Thread.sleep(100);
486         // When this returns -1 it means the znode doesn't exist
487         this.znodeVersion = tickleNodeMerge(server.getZooKeeper(),
488             this.mergedRegionInfo, region_a.getRegionInfo(),
489             region_b.getRegionInfo(), server.getServerName(), this.znodeVersion);
490         spins++;
491       } while (this.znodeVersion != -1 && !server.isStopped()
492           && !services.isStopping());
493     } catch (Exception e) {
494       if (e instanceof InterruptedException) {
495         Thread.currentThread().interrupt();
496       }
497       throw new IOException("Failed telling master about merge "
498           + mergedRegionInfo.getEncodedName(), e);
499     }
500 
501 
502     // Leaving here, the mergedir with its dross will be in place but since the
503     // merge was successful, just leave it; it'll be cleaned when region_a is
504     // cleaned up by CatalogJanitor on master
505   }
506 
507   /**
508    * Create reference file(s) of merging regions under the region_a merges dir
509    * @param hstoreFilesOfRegionA
510    * @param hstoreFilesOfRegionB
511    * @throws IOException
512    */
513   private void mergeStoreFiles(
514       Map<byte[], List<StoreFile>> hstoreFilesOfRegionA,
515       Map<byte[], List<StoreFile>> hstoreFilesOfRegionB)
516       throws IOException {
517     // Create reference file(s) of region A in mergdir
518     HRegionFileSystem fs_a = this.region_a.getRegionFileSystem();
519     for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesOfRegionA
520         .entrySet()) {
521       String familyName = Bytes.toString(entry.getKey());
522       for (StoreFile storeFile : entry.getValue()) {
523         fs_a.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
524             this.mergesdir);
525       }
526     }
527     // Create reference file(s) of region B in mergedir
528     HRegionFileSystem fs_b = this.region_b.getRegionFileSystem();
529     for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesOfRegionB
530         .entrySet()) {
531       String familyName = Bytes.toString(entry.getKey());
532       for (StoreFile storeFile : entry.getValue()) {
533         fs_b.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
534             this.mergesdir);
535       }
536     }
537   }
538 
539   /**
540    * @param server Hosting server instance (May be null when testing).
541    * @param services Services of regionserver, used to online regions.
542    * @throws IOException If thrown, rollback failed. Take drastic action.
543    * @return True if we successfully rolled back, false if we got to the point
544    *         of no return and so now need to abort the server to minimize
545    *         damage.
546    */
547   public boolean rollback(final Server server,
548       final RegionServerServices services) throws IOException {
549     assert this.mergedRegionInfo != null;
550     boolean result = true;
551     ListIterator<JournalEntry> iterator = this.journal
552         .listIterator(this.journal.size());
553     // Iterate in reverse.
554     while (iterator.hasPrevious()) {
555       JournalEntry je = iterator.previous();
556       switch (je) {
557 
558         case SET_MERGING_IN_ZK:
559           if (server != null && server.getZooKeeper() != null) {
560             cleanZK(server, this.mergedRegionInfo);
561           }
562           break;
563 
564         case CREATED_MERGE_DIR:
565           this.region_a.writestate.writesEnabled = true;
566           this.region_b.writestate.writesEnabled = true;
567           this.region_a.getRegionFileSystem().cleanupMergesDir();
568           break;
569 
570         case CLOSED_REGION_A:
571           try {
572             // So, this returns a seqid but if we just closed and then reopened,
573             // we should be ok. On close, we flushed using sequenceid obtained
574             // from hosting regionserver so no need to propagate the sequenceid
575             // returned out of initialize below up into regionserver as we
576             // normally do.
577             this.region_a.initialize();
578           } catch (IOException e) {
579             LOG.error("Failed rollbacking CLOSED_REGION_A of region "
580                 + this.region_a.getRegionNameAsString(), e);
581             throw new RuntimeException(e);
582           }
583           break;
584 
585         case OFFLINED_REGION_A:
586           if (services != null)
587             services.addToOnlineRegions(this.region_a);
588           break;
589 
590         case CLOSED_REGION_B:
591           try {
592             this.region_b.initialize();
593           } catch (IOException e) {
594             LOG.error("Failed rollbacking CLOSED_REGION_A of region "
595                 + this.region_b.getRegionNameAsString(), e);
596             throw new RuntimeException(e);
597           }
598           break;
599 
600         case OFFLINED_REGION_B:
601           if (services != null)
602             services.addToOnlineRegions(this.region_b);
603           break;
604 
605         case STARTED_MERGED_REGION_CREATION:
606           this.region_a.getRegionFileSystem().cleanupMergedRegion(
607               this.mergedRegionInfo);
608           break;
609 
610         case PONR:
611           // We got to the point-of-no-return so we need to just abort. Return
612           // immediately. Do not clean up created merged regions.
613           return false;
614 
615         default:
616           throw new RuntimeException("Unhandled journal entry: " + je);
617       }
618     }
619     return result;
620   }
621 
622   HRegionInfo getMergedRegionInfo() {
623     return this.mergedRegionInfo;
624   }
625 
626   // For unit testing.
627   Path getMergesDir() {
628     return this.mergesdir;
629   }
630 
631   private static void cleanZK(final Server server, final HRegionInfo hri) {
632     try {
633       // Only delete if its in expected state; could have been hijacked.
634       ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
635           EventType.RS_ZK_REGION_MERGING);
636     } catch (KeeperException.NoNodeException e) {
637       LOG.warn("Failed cleanup zk node of " + hri.getRegionNameAsString(), e);
638     } catch (KeeperException e) {
639       server.abort("Failed cleanup zk node of " + hri.getRegionNameAsString(),e);
640     }
641 
642   }
643 
644   /**
645    * Creates a new ephemeral node in the MERGING state for the merged region.
646    * Create it ephemeral in case regionserver dies mid-merge.
647    * 
648    * <p>
649    * Does not transition nodes from other states. If a node already exists for
650    * this region, a {@link NodeExistsException} will be thrown.
651    *
652    * @param zkw zk reference
653    * @param region region to be created as offline
654    * @param serverName server event originates from
655    * @return Version of znode created.
656    * @throws KeeperException
657    * @throws IOException
658    */
659   int createNodeMerging(final ZooKeeperWatcher zkw, final HRegionInfo region,
660       final ServerName serverName) throws KeeperException, IOException {
661     LOG.debug(zkw.prefix("Creating ephemeral node for "
662         + region.getEncodedName() + " in MERGING state"));
663     RegionTransition rt = RegionTransition.createRegionTransition(
664         EventType.RS_ZK_REGION_MERGING, region.getRegionName(), serverName);
665     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
666     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
667       throw new IOException("Failed create of ephemeral " + node);
668     }
669     // Transition node from MERGING to MERGING and pick up version so we
670     // can be sure this znode is ours; version is needed deleting.
671     return transitionNodeMerging(zkw, region, serverName, -1);
672   }
673 
674   /**
675    * Transitions an existing node for the specified region which is currently in
676    * the MERGING state to be in the MERGE state. Converts the ephemeral MERGING
677    * znode to an ephemeral MERGE node. Master cleans up MERGE znode when it
678    * reads it (or if we crash, zk will clean it up).
679    *
680    * <p>
681    * Does not transition nodes from other states. If for some reason the node
682    * could not be transitioned, the method returns -1. If the transition is
683    * successful, the version of the node after transition is returned.
684    *
685    * <p>
686    * This method can fail and return false for three different reasons:
687    * <ul>
688    * <li>Node for this region does not exist</li>
689    * <li>Node for this region is not in MERGING state</li>
690    * <li>After verifying MERGING state, update fails because of wrong version
691    * (this should never actually happen since an RS only does this transition
692    * following a transition to MERGING. if two RS are conflicting, one would
693    * fail the original transition to MERGING and not this transition)</li>
694    * </ul>
695    *
696    * <p>
697    * Does not set any watches.
698    *
699    * <p>
700    * This method should only be used by a RegionServer when completing the open
701    * of merged region.
702    *
703    * @param zkw zk reference
704    * @param merged region to be transitioned to opened
705    * @param a merging region A
706    * @param b merging region B
707    * @param serverName server event originates from
708    * @param znodeVersion expected version of data before modification
709    * @return version of node after transition, -1 if unsuccessful transition
710    * @throws KeeperException if unexpected zookeeper exception
711    * @throws IOException
712    */
713   private static int transitionNodeMerge(ZooKeeperWatcher zkw,
714       HRegionInfo merged, HRegionInfo a, HRegionInfo b, ServerName serverName,
715       final int znodeVersion) throws KeeperException, IOException {
716     byte[] payload = HRegionInfo.toDelimitedByteArray(merged, a, b);
717     return ZKAssign.transitionNode(zkw, merged, serverName,
718         EventType.RS_ZK_REGION_MERGING, EventType.RS_ZK_REGION_MERGE,
719         znodeVersion, payload);
720   }
721 
722   /**
723    *
724    * @param zkw zk reference
725    * @param parent region to be transitioned to merging
726    * @param serverName server event originates from
727    * @param version znode version
728    * @return version of node after transition, -1 if unsuccessful transition
729    * @throws KeeperException
730    * @throws IOException
731    */
732   int transitionNodeMerging(final ZooKeeperWatcher zkw,
733       final HRegionInfo parent, final ServerName serverName, final int version)
734       throws KeeperException, IOException {
735     return ZKAssign.transitionNode(zkw, parent, serverName,
736             EventType.RS_ZK_REGION_MERGING, EventType.RS_ZK_REGION_MERGING,
737         version);
738   }
739 
740   private static int tickleNodeMerge(ZooKeeperWatcher zkw, HRegionInfo merged,
741       HRegionInfo a, HRegionInfo b, ServerName serverName,
742       final int znodeVersion) throws KeeperException, IOException {
743     byte[] payload = HRegionInfo.toDelimitedByteArray(a, b);
744     return ZKAssign.transitionNode(zkw, merged, serverName,
745         EventType.RS_ZK_REGION_MERGE, EventType.RS_ZK_REGION_MERGE,
746         znodeVersion, payload);
747   }
748 
749   /**
750    * Checks if the given region has merge qualifier in .META.
751    * @param services
752    * @param regionName name of specified region
753    * @return true if the given region has merge qualifier in META.(It will be
754    *         cleaned by CatalogJanitor)
755    * @throws IOException
756    */
757   boolean hasMergeQualifierInMeta(final RegionServerServices services,
758       final byte[] regionName) throws IOException {
759     // Get merge regions if it is a merged region and already has merge
760     // qualifier
761     Pair<HRegionInfo, HRegionInfo> mergeRegions = MetaReader
762         .getRegionsFromMergeQualifier(services.getCatalogTracker(), regionName);
763     if (mergeRegions != null &&
764         (mergeRegions.getFirst() != null || mergeRegions.getSecond() != null)) {
765       // It has merge qualifier
766       return true;
767     }
768     return false;
769   }
770 }
771