View Javadoc

1   /**
2    * Copyright The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements. See the NOTICE file distributed with this
6    * work for additional information regarding copyright ownership. The ASF
7    * licenses this file to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance with the License.
9    * You may obtain a copy of the License at
10   *
11   * http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
16   * License for the specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_MERGED;
22  import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_MERGING;
23  import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REQUEST_REGION_MERGE;
24  
25  import java.io.IOException;
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.ListIterator;
29  import java.util.Map;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.HRegionInfo;
37  import org.apache.hadoop.hbase.MetaMutationAnnotation;
38  import org.apache.hadoop.hbase.RegionTransition;
39  import org.apache.hadoop.hbase.Server;
40  import org.apache.hadoop.hbase.ServerName;
41  import org.apache.hadoop.hbase.catalog.CatalogTracker;
42  import org.apache.hadoop.hbase.catalog.MetaEditor;
43  import org.apache.hadoop.hbase.catalog.MetaReader;
44  import org.apache.hadoop.hbase.client.Delete;
45  import org.apache.hadoop.hbase.client.Mutation;
46  import org.apache.hadoop.hbase.client.Put;
47  import org.apache.hadoop.hbase.executor.EventType;
48  import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
49  import org.apache.hadoop.hbase.regionserver.SplitTransaction.LoggingProgressable;
50  import org.apache.hadoop.hbase.util.Bytes;
51  import org.apache.hadoop.hbase.util.ConfigUtil;
52  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
53  import org.apache.hadoop.hbase.util.Pair;
54  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
55  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
56  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
57  import org.apache.zookeeper.KeeperException;
58  import org.apache.zookeeper.KeeperException.NodeExistsException;
59  import org.apache.zookeeper.data.Stat;
60  
61  /**
62   * Executes region merge as a "transaction". It is similar with
63   * SplitTransaction. Call {@link #prepare(RegionServerServices)} to setup the
64   * transaction, {@link #execute(Server, RegionServerServices)} to run the
65   * transaction and {@link #rollback(Server, RegionServerServices)} to cleanup if
66   * execute fails.
67   * 
68   * <p>
69   * Here is an example of how you would use this class:
70   * 
71   * <pre>
72   *  RegionMergeTransaction mt = new RegionMergeTransaction(this.conf, parent, midKey)
73   *  if (!mt.prepare(services)) return;
74   *  try {
75   *    mt.execute(server, services);
76   *  } catch (IOException ioe) {
77   *    try {
78   *      mt.rollback(server, services);
79   *      return;
80   *    } catch (RuntimeException e) {
81   *      myAbortable.abort("Failed merge, abort");
82   *    }
83   *  }
84   * </Pre>
85   * <p>
86   * This class is not thread safe. Caller needs ensure merge is run by one thread
87   * only.
88   */
89  @InterfaceAudience.Private
90  public class RegionMergeTransaction {
91    private static final Log LOG = LogFactory.getLog(RegionMergeTransaction.class);
92  
93    // Merged region info
94    private HRegionInfo mergedRegionInfo;
95    // region_a sorts before region_b
96    private final HRegion region_a;
97    private final HRegion region_b;
98    // merges dir is under region_a
99    private final Path mergesdir;
100   private int znodeVersion = -1;
101   // We only merge adjacent regions if forcible is false
102   private final boolean forcible;
103   private boolean useZKForAssignment;
104   private final long masterSystemTime;
105 
106   /**
107    * Types to add to the transaction journal. Each enum is a step in the merge
108    * transaction. Used to figure how much we need to rollback.
109    */
110   enum JournalEntry {
111     /**
112      * Set region as in transition, set it into MERGING state.
113      */
114     SET_MERGING_IN_ZK,
115     /**
116      * We created the temporary merge data directory.
117      */
118     CREATED_MERGE_DIR,
119     /**
120      * Closed the merging region A.
121      */
122     CLOSED_REGION_A,
123     /**
124      * The merging region A has been taken out of the server's online regions list.
125      */
126     OFFLINED_REGION_A,
127     /**
128      * Closed the merging region B.
129      */
130     CLOSED_REGION_B,
131     /**
132      * The merging region B has been taken out of the server's online regions list.
133      */
134     OFFLINED_REGION_B,
135     /**
136      * Started in on creation of the merged region.
137      */
138     STARTED_MERGED_REGION_CREATION,
139     /**
140      * Point of no return. If we got here, then transaction is not recoverable
141      * other than by crashing out the regionserver.
142      */
143     PONR
144   }
145 
146   /*
147    * Journal of how far the merge transaction has progressed.
148    */
149   private final List<JournalEntry> journal = new ArrayList<JournalEntry>();
150 
151   private static IOException closedByOtherException = new IOException(
152       "Failed to close region: already closed by another thread");
153 
154   private RegionServerCoprocessorHost rsCoprocessorHost = null;
155 
156   /**
157    * Constructor
158    * @param a region a to merge
159    * @param b region b to merge
160    * @param forcible if false, we will only merge adjacent regions
161    */
162   public RegionMergeTransaction(final HRegion a, final HRegion b,
163       final boolean forcible) {
164     this(a, b, forcible, EnvironmentEdgeManager.currentTimeMillis());
165   }
166 
167   /**
168    * Constructor
169    * @param a region a to merge
170    * @param b region b to merge
171    * @param forcible if false, we will only merge adjacent regions
172    * @param masterSystemTime the time at the master side
173    */
174   public RegionMergeTransaction(final HRegion a, final HRegion b,
175       final boolean forcible, long masterSystemTime) {
176     if (a.getRegionInfo().compareTo(b.getRegionInfo()) <= 0) {
177       this.region_a = a;
178       this.region_b = b;
179     } else {
180       this.region_a = b;
181       this.region_b = a;
182     }
183     this.forcible = forcible;
184     this.masterSystemTime = masterSystemTime;
185     this.mergesdir = region_a.getRegionFileSystem().getMergesDir();
186   }
187 
188   /**
189    * Does checks on merge inputs.
190    * @param services
191    * @return <code>true</code> if the regions are mergeable else
192    *         <code>false</code> if they are not (e.g. its already closed, etc.).
193    */
194   public boolean prepare(final RegionServerServices services) {
195     if (!region_a.getTableDesc().getTableName()
196         .equals(region_b.getTableDesc().getTableName())) {
197       LOG.info("Can't merge regions " + region_a + "," + region_b
198           + " because they do not belong to the same table");
199       return false;
200     }
201     if (region_a.getRegionInfo().equals(region_b.getRegionInfo())) {
202       LOG.info("Can't merge the same region " + region_a);
203       return false;
204     }
205     if (!forcible && !HRegionInfo.areAdjacent(region_a.getRegionInfo(),
206             region_b.getRegionInfo())) {
207       String msg = "Skip merging " + this.region_a.getRegionNameAsString()
208           + " and " + this.region_b.getRegionNameAsString()
209           + ", because they are not adjacent.";
210       LOG.info(msg);
211       return false;
212     }
213     if (!this.region_a.isMergeable() || !this.region_b.isMergeable()) {
214       return false;
215     }
216     try {
217       boolean regionAHasMergeQualifier = hasMergeQualifierInMeta(services,
218           region_a.getRegionName());
219       if (regionAHasMergeQualifier ||
220           hasMergeQualifierInMeta(services, region_b.getRegionName())) {
221         LOG.debug("Region " + (regionAHasMergeQualifier ? region_a.getRegionNameAsString()
222                 : region_b.getRegionNameAsString())
223             + " is not mergeable because it has merge qualifier in META");
224         return false;
225       }
226     } catch (IOException e) {
227       LOG.warn("Failed judging whether merge transaction is available for "
228               + region_a.getRegionNameAsString() + " and "
229               + region_b.getRegionNameAsString(), e);
230       return false;
231     }
232 
233     // WARN: make sure there is no parent region of the two merging regions in
234     // hbase:meta If exists, fixing up daughters would cause daughter regions(we
235     // have merged one) online again when we restart master, so we should clear
236     // the parent region to prevent the above case
237     // Since HBASE-7721, we don't need fix up daughters any more. so here do
238     // nothing
239 
240     this.mergedRegionInfo = getMergedRegionInfo(region_a.getRegionInfo(),
241         region_b.getRegionInfo());
242     return true;
243   }
244 
245   /**
246    * Run the transaction.
247    * @param server Hosting server instance. Can be null when testing (won't try
248    *          and update in zk if a null server)
249    * @param services Used to online/offline regions.
250    * @throws IOException If thrown, transaction failed. Call
251    *           {@link #rollback(Server, RegionServerServices)}
252    * @return merged region
253    * @throws IOException
254    * @see #rollback(Server, RegionServerServices)
255    */
256   public HRegion execute(final Server server,
257       final RegionServerServices services) throws IOException {
258     useZKForAssignment = server == null ? true :
259       ConfigUtil.useZKForAssignment(server.getConfiguration());
260     if (rsCoprocessorHost == null) {
261       rsCoprocessorHost = server != null ? ((HRegionServer) server).getCoprocessorHost() : null;
262     }
263     HRegion mergedRegion = createMergedRegion(server, services);
264     if (rsCoprocessorHost != null) {
265       rsCoprocessorHost.postMergeCommit(this.region_a, this.region_b, mergedRegion);
266     }
267     return stepsAfterPONR(server, services, mergedRegion);
268   }
269 
270   public HRegion stepsAfterPONR(final Server server, final RegionServerServices services,
271       HRegion mergedRegion) throws IOException {
272     openMergedRegion(server, services, mergedRegion);
273     transitionZKNode(server, services, mergedRegion);
274     return mergedRegion;
275   }
276 
277   /**
278    * Prepare the merged region and region files.
279    * @param server Hosting server instance. Can be null when testing (won't try
280    *          and update in zk if a null server)
281    * @param services Used to online/offline regions.
282    * @return merged region
283    * @throws IOException If thrown, transaction failed. Call
284    *           {@link #rollback(Server, RegionServerServices)}
285    */
286   HRegion createMergedRegion(final Server server,
287       final RegionServerServices services) throws IOException {
288     LOG.info("Starting merge of " + region_a + " and "
289         + region_b.getRegionNameAsString() + ", forcible=" + forcible);
290     if ((server != null && server.isStopped())
291         || (services != null && services.isStopping())) {
292       throw new IOException("Server is stopped or stopping");
293     }
294 
295     if (rsCoprocessorHost != null) {
296       if (rsCoprocessorHost.preMerge(this.region_a, this.region_b)) {
297         throw new IOException("Coprocessor bypassing regions " + this.region_a + " "
298             + this.region_b + " merge.");
299       }
300     }
301 
302     // If true, no cluster to write meta edits to or to update znodes in.
303     boolean testing = server == null ? true : server.getConfiguration()
304         .getBoolean("hbase.testing.nocluster", false);
305 
306     HRegion mergedRegion = stepsBeforePONR(server, services, testing);
307 
308     @MetaMutationAnnotation
309     List<Mutation> metaEntries = new ArrayList<Mutation>();
310     if (rsCoprocessorHost != null) {
311       if (rsCoprocessorHost.preMergeCommit(this.region_a, this.region_b, metaEntries)) {
312         throw new IOException("Coprocessor bypassing regions " + this.region_a + " "
313             + this.region_b + " merge.");
314       }
315       try {
316         for (Mutation p : metaEntries) {
317           HRegionInfo.parseRegionName(p.getRow());
318         }
319       } catch (IOException e) {
320         LOG.error("Row key of mutation from coprocessor is not parsable as region name."
321             + "Mutations from coprocessor should only be for hbase:meta table.", e);
322         throw e;
323       }
324     }
325 
326     // This is the point of no return. Similar with SplitTransaction.
327     // IF we reach the PONR then subsequent failures need to crash out this
328     // regionserver
329     this.journal.add(JournalEntry.PONR);
330 
331     // Add merged region and delete region_a and region_b
332     // as an atomic update. See HBASE-7721. This update to hbase:meta makes the region
333     // will determine whether the region is merged or not in case of failures.
334     // If it is successful, master will roll-forward, if not, master will
335     // rollback
336     if (!testing && useZKForAssignment) {
337       if (metaEntries.isEmpty()) {
338         MetaEditor.mergeRegions(server.getCatalogTracker(), mergedRegion.getRegionInfo(), region_a
339             .getRegionInfo(), region_b.getRegionInfo(), server.getServerName(), masterSystemTime);
340       } else {
341         mergeRegionsAndPutMetaEntries(server.getCatalogTracker(), mergedRegion.getRegionInfo(),
342           region_a.getRegionInfo(), region_b.getRegionInfo(), server.getServerName(), metaEntries);
343       }
344     } else if (services != null && !useZKForAssignment) {
345       if (!services.reportRegionStateTransition(TransitionCode.MERGE_PONR,
346           mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo())) {
347         // Passed PONR, let SSH clean it up
348         throw new IOException("Failed to notify master that merge passed PONR: "
349           + region_a.getRegionInfo().getRegionNameAsString() + " and "
350           + region_b.getRegionInfo().getRegionNameAsString());
351       }
352     }
353     return mergedRegion;
354   }
355 
356   private void mergeRegionsAndPutMetaEntries(CatalogTracker catalogTracker,
357       HRegionInfo mergedRegion, HRegionInfo regionA, HRegionInfo regionB, ServerName serverName,
358       List<Mutation> metaEntries) throws IOException {
359     prepareMutationsForMerge(mergedRegion, regionA, regionB, serverName, metaEntries);
360     MetaEditor.mutateMetaTable(catalogTracker, metaEntries);
361   }
362 
363   public void prepareMutationsForMerge(HRegionInfo mergedRegion, HRegionInfo regionA,
364       HRegionInfo regionB, ServerName serverName, List<Mutation> mutations) throws IOException {
365     HRegionInfo copyOfMerged = new HRegionInfo(mergedRegion);
366 
367     // use the maximum of what master passed us vs local time.
368     long time = Math.max(EnvironmentEdgeManager.currentTimeMillis(), masterSystemTime);
369 
370     // Put for parent
371     Put putOfMerged = MetaEditor.makePutFromRegionInfo(copyOfMerged, time);
372     putOfMerged.add(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER, regionA.toByteArray());
373     putOfMerged.add(HConstants.CATALOG_FAMILY, HConstants.MERGEB_QUALIFIER, regionB.toByteArray());
374     mutations.add(putOfMerged);
375     // Deletes for merging regions
376     Delete deleteA = MetaEditor.makeDeleteFromRegionInfo(regionA, time);
377     Delete deleteB = MetaEditor.makeDeleteFromRegionInfo(regionB, time);
378     mutations.add(deleteA);
379     mutations.add(deleteB);
380     // The merged is a new region, openSeqNum = 1 is fine.
381     addLocation(putOfMerged, serverName, 1);
382   }
383 
384   @SuppressWarnings("deprecation")
385   public Put addLocation(final Put p, final ServerName sn, long openSeqNum) {
386     p.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes
387         .toBytes(sn.getHostAndPort()));
388     p.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(sn
389         .getStartcode()));
390     p.add(HConstants.CATALOG_FAMILY, HConstants.SEQNUM_QUALIFIER, Bytes.toBytes(openSeqNum));
391     return p;
392   }
393 
394   public HRegion stepsBeforePONR(final Server server, final RegionServerServices services,
395       boolean testing) throws IOException {
396     // Set ephemeral MERGING znode up in zk. Mocked servers sometimes don't
397     // have zookeeper so don't do zk stuff if server or zookeeper is null
398     if (useZKAndZKIsSet(server)) {
399       try {
400         createNodeMerging(server.getZooKeeper(), this.mergedRegionInfo,
401           server.getServerName(), region_a.getRegionInfo(), region_b.getRegionInfo());
402       } catch (KeeperException e) {
403         throw new IOException("Failed creating PENDING_MERGE znode on "
404             + this.mergedRegionInfo.getRegionNameAsString(), e);
405       }
406     } else if (services != null && !useZKForAssignment) {
407       if (!services.reportRegionStateTransition(TransitionCode.READY_TO_MERGE,
408           mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo())) {
409         throw new IOException("Failed to get ok from master to merge "
410           + region_a.getRegionInfo().getRegionNameAsString() + " and "
411           + region_b.getRegionInfo().getRegionNameAsString());
412       }
413     }
414     this.journal.add(JournalEntry.SET_MERGING_IN_ZK);
415     if (useZKAndZKIsSet(server)) {
416       // After creating the merge node, wait for master to transition it
417       // from PENDING_MERGE to MERGING so that we can move on. We want master
418       // knows about it and won't transition any region which is merging.
419       znodeVersion = getZKNode(server, services);
420     }
421 
422     this.region_a.getRegionFileSystem().createMergesDir();
423     this.journal.add(JournalEntry.CREATED_MERGE_DIR);
424 
425     Map<byte[], List<StoreFile>> hstoreFilesOfRegionA = closeAndOfflineRegion(
426         services, this.region_a, true, testing);
427     Map<byte[], List<StoreFile>> hstoreFilesOfRegionB = closeAndOfflineRegion(
428         services, this.region_b, false, testing);
429 
430     assert hstoreFilesOfRegionA != null && hstoreFilesOfRegionB != null;
431 
432 
433     //
434     // mergeStoreFiles creates merged region dirs under the region_a merges dir
435     // Nothing to unroll here if failure -- clean up of CREATE_MERGE_DIR will
436     // clean this up.
437     mergeStoreFiles(hstoreFilesOfRegionA, hstoreFilesOfRegionB);
438 
439     if (server != null && useZKAndZKIsSet(server)) {
440       try {
441         // Do one more check on the merging znode (before it is too late) in case
442         // any merging region is moved somehow. If so, the znode transition will fail.
443         this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
444           this.mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo(),
445           server.getServerName(), this.znodeVersion,
446           RS_ZK_REGION_MERGING, RS_ZK_REGION_MERGING);
447       } catch (KeeperException e) {
448         throw new IOException("Failed setting MERGING znode on "
449             + this.mergedRegionInfo.getRegionNameAsString(), e);
450       }
451     }
452 
453     // Log to the journal that we are creating merged region. We could fail
454     // halfway through. If we do, we could have left
455     // stuff in fs that needs cleanup -- a storefile or two. Thats why we
456     // add entry to journal BEFORE rather than AFTER the change.
457     this.journal.add(JournalEntry.STARTED_MERGED_REGION_CREATION);
458     HRegion mergedRegion = createMergedRegionFromMerges(this.region_a,
459         this.region_b, this.mergedRegionInfo);
460     return mergedRegion;
461   }
462 
463   /**
464    * Create a merged region from the merges directory under region a. In order
465    * to mock it for tests, place it with a new method.
466    * @param a hri of region a
467    * @param b hri of region b
468    * @param mergedRegion hri of merged region
469    * @return merged HRegion.
470    * @throws IOException
471    */
472   HRegion createMergedRegionFromMerges(final HRegion a, final HRegion b,
473       final HRegionInfo mergedRegion) throws IOException {
474     return a.createMergedRegionFromMerges(mergedRegion, b);
475   }
476 
477   /**
478    * Close the merging region and offline it in regionserver
479    * @param services
480    * @param region
481    * @param isRegionA true if it is merging region a, false if it is region b
482    * @param testing true if it is testing
483    * @return a map of family name to list of store files
484    * @throws IOException
485    */
486   private Map<byte[], List<StoreFile>> closeAndOfflineRegion(
487       final RegionServerServices services, final HRegion region,
488       final boolean isRegionA, final boolean testing) throws IOException {
489     Map<byte[], List<StoreFile>> hstoreFilesToMerge = null;
490     Exception exceptionToThrow = null;
491     try {
492       hstoreFilesToMerge = region.close(false);
493     } catch (Exception e) {
494       exceptionToThrow = e;
495     }
496     if (exceptionToThrow == null && hstoreFilesToMerge == null) {
497       // The region was closed by a concurrent thread. We can't continue
498       // with the merge, instead we must just abandon the merge. If we
499       // reopen or merge this could cause problems because the region has
500       // probably already been moved to a different server, or is in the
501       // process of moving to a different server.
502       exceptionToThrow = closedByOtherException;
503     }
504     if (exceptionToThrow != closedByOtherException) {
505       this.journal.add(isRegionA ? JournalEntry.CLOSED_REGION_A
506           : JournalEntry.CLOSED_REGION_B);
507     }
508     if (exceptionToThrow != null) {
509       if (exceptionToThrow instanceof IOException)
510         throw (IOException) exceptionToThrow;
511       throw new IOException(exceptionToThrow);
512     }
513 
514     if (!testing) {
515       services.removeFromOnlineRegions(region, null);
516     }
517     this.journal.add(isRegionA ? JournalEntry.OFFLINED_REGION_A
518         : JournalEntry.OFFLINED_REGION_B);
519     return hstoreFilesToMerge;
520   }
521 
522   /**
523    * Get merged region info through the specified two regions
524    * @param a merging region A
525    * @param b merging region B
526    * @return the merged region info
527    */
528   public static HRegionInfo getMergedRegionInfo(final HRegionInfo a,
529       final HRegionInfo b) {
530     long rid = EnvironmentEdgeManager.currentTimeMillis();
531     // Regionid is timestamp. Merged region's id can't be less than that of
532     // merging regions else will insert at wrong location in hbase:meta
533     if (rid < a.getRegionId() || rid < b.getRegionId()) {
534       LOG.warn("Clock skew; merging regions id are " + a.getRegionId()
535           + " and " + b.getRegionId() + ", but current time here is " + rid);
536       rid = Math.max(a.getRegionId(), b.getRegionId()) + 1;
537     }
538 
539     byte[] startKey = null;
540     byte[] endKey = null;
541     // Choose the smaller as start key
542     if (a.compareTo(b) <= 0) {
543       startKey = a.getStartKey();
544     } else {
545       startKey = b.getStartKey();
546     }
547     // Choose the bigger as end key
548     if (Bytes.equals(a.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)
549         || (!Bytes.equals(b.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)
550             && Bytes.compareTo(a.getEndKey(), b.getEndKey()) > 0)) {
551       endKey = a.getEndKey();
552     } else {
553       endKey = b.getEndKey();
554     }
555 
556     // Merged region is sorted between two merging regions in META
557     HRegionInfo mergedRegionInfo = new HRegionInfo(a.getTable(), startKey,
558         endKey, false, rid);
559     return mergedRegionInfo;
560   }
561 
562   /**
563    * Perform time consuming opening of the merged region.
564    * @param server Hosting server instance. Can be null when testing (won't try
565    *          and update in zk if a null server)
566    * @param services Used to online/offline regions.
567    * @param merged the merged region
568    * @throws IOException If thrown, transaction failed. Call
569    *           {@link #rollback(Server, RegionServerServices)}
570    */
571   void openMergedRegion(final Server server,
572       final RegionServerServices services, HRegion merged) throws IOException {
573     boolean stopped = server != null && server.isStopped();
574     boolean stopping = services != null && services.isStopping();
575     if (stopped || stopping) {
576       LOG.info("Not opening merged region  " + merged.getRegionNameAsString()
577           + " because stopping=" + stopping + ", stopped=" + stopped);
578       return;
579     }
580     HRegionInfo hri = merged.getRegionInfo();
581     LoggingProgressable reporter = server == null ? null
582         : new LoggingProgressable(hri, server.getConfiguration().getLong(
583             "hbase.regionserver.regionmerge.open.log.interval", 10000));
584     merged.openHRegion(reporter);
585 
586     if (services != null) {
587       try {
588         if (useZKForAssignment) {
589           services.postOpenDeployTasks(merged, server.getCatalogTracker());
590         } else if (!services.reportRegionStateTransition(TransitionCode.MERGED,
591             mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo())) {
592           throw new IOException("Failed to report merged region to master: "
593             + mergedRegionInfo.getShortNameToLog());
594         }
595         services.addToOnlineRegions(merged);
596       } catch (KeeperException ke) {
597         throw new IOException(ke);
598       }
599     }
600 
601   }
602 
603   /**
604    * Finish off merge transaction, transition the zknode
605    * @param server Hosting server instance. Can be null when testing (won't try
606    *          and update in zk if a null server)
607    * @param services Used to online/offline regions.
608    * @throws IOException If thrown, transaction failed. Call
609    *           {@link #rollback(Server, RegionServerServices)}
610    */
611   void transitionZKNode(final Server server, final RegionServerServices services,
612       HRegion mergedRegion) throws IOException {
613     if (useZKAndZKIsSet(server)) {
614       // Tell master about merge by updating zk. If we fail, abort.
615       try {
616         this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
617           this.mergedRegionInfo, region_a.getRegionInfo(),
618           region_b.getRegionInfo(), server.getServerName(), this.znodeVersion,
619           RS_ZK_REGION_MERGING, RS_ZK_REGION_MERGED);
620   
621         long startTime = EnvironmentEdgeManager.currentTimeMillis();
622         int spins = 0;
623         // Now wait for the master to process the merge. We know it's done
624         // when the znode is deleted. The reason we keep tickling the znode is
625         // that it's possible for the master to miss an event.
626         do {
627           if (spins % 10 == 0) {
628             LOG.debug("Still waiting on the master to process the merge for "
629                 + this.mergedRegionInfo.getEncodedName() + ", waited "
630                 + (EnvironmentEdgeManager.currentTimeMillis() - startTime) + "ms");
631           }
632           Thread.sleep(100);
633           // When this returns -1 it means the znode doesn't exist
634           this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
635             this.mergedRegionInfo, region_a.getRegionInfo(),
636             region_b.getRegionInfo(), server.getServerName(), this.znodeVersion,
637             RS_ZK_REGION_MERGED, RS_ZK_REGION_MERGED);
638           spins++;
639         } while (this.znodeVersion != -1 && !server.isStopped()
640             && !services.isStopping());
641       } catch (Exception e) {
642         if (e instanceof InterruptedException) {
643           Thread.currentThread().interrupt();
644         }
645         throw new IOException("Failed telling master about merge "
646             + mergedRegionInfo.getEncodedName(), e);
647       }
648     }
649 
650     if (rsCoprocessorHost != null) {
651       rsCoprocessorHost.postMerge(this.region_a, this.region_b, mergedRegion);
652     }
653 
654     // Leaving here, the mergedir with its dross will be in place but since the
655     // merge was successful, just leave it; it'll be cleaned when region_a is
656     // cleaned up by CatalogJanitor on master
657   }
658 
659   /**
660    * Wait for the merging node to be transitioned from pending_merge
661    * to merging by master. That's how we are sure master has processed
662    * the event and is good with us to move on. If we don't get any update,
663    * we periodically transition the node so that master gets the callback.
664    * If the node is removed or is not in pending_merge state any more,
665    * we abort the merge.
666    */
667   private int getZKNode(final Server server,
668       final RegionServerServices services) throws IOException {
669     // Wait for the master to process the pending_merge.
670     try {
671       int spins = 0;
672       Stat stat = new Stat();
673       ZooKeeperWatcher zkw = server.getZooKeeper();
674       ServerName expectedServer = server.getServerName();
675       String node = mergedRegionInfo.getEncodedName();
676       while (!(server.isStopped() || services.isStopping())) {
677         if (spins % 5 == 0) {
678           LOG.debug("Still waiting for master to process "
679             + "the pending_merge for " + node);
680           transitionMergingNode(zkw, mergedRegionInfo, region_a.getRegionInfo(),
681             region_b.getRegionInfo(), expectedServer, -1, RS_ZK_REQUEST_REGION_MERGE,
682             RS_ZK_REQUEST_REGION_MERGE);
683         }
684         Thread.sleep(100);
685         spins++;
686         byte [] data = ZKAssign.getDataNoWatch(zkw, node, stat);
687         if (data == null) {
688           throw new IOException("Data is null, merging node "
689             + node + " no longer exists");
690         }
691         RegionTransition rt = RegionTransition.parseFrom(data);
692         EventType et = rt.getEventType();
693         if (et == RS_ZK_REGION_MERGING) {
694           ServerName serverName = rt.getServerName();
695           if (!serverName.equals(expectedServer)) {
696             throw new IOException("Merging node " + node + " is for "
697               + serverName + ", not us " + expectedServer);
698           }
699           byte [] payloadOfMerging = rt.getPayload();
700           List<HRegionInfo> mergingRegions = HRegionInfo.parseDelimitedFrom(
701             payloadOfMerging, 0, payloadOfMerging.length);
702           assert mergingRegions.size() == 3;
703           HRegionInfo a = mergingRegions.get(1);
704           HRegionInfo b = mergingRegions.get(2);
705           HRegionInfo hri_a = region_a.getRegionInfo();
706           HRegionInfo hri_b = region_b.getRegionInfo();
707           if (!(hri_a.equals(a) && hri_b.equals(b))) {
708             throw new IOException("Merging node " + node + " is for " + a + ", "
709               + b + ", not expected regions: " + hri_a + ", " + hri_b);
710           }
711           // Master has processed it.
712           return stat.getVersion();
713         }
714         if (et != RS_ZK_REQUEST_REGION_MERGE) {
715           throw new IOException("Merging node " + node
716             + " moved out of merging to " + et);
717         }
718       }
719       // Server is stopping/stopped
720       throw new IOException("Server is "
721         + (services.isStopping() ? "stopping" : "stopped"));
722     } catch (Exception e) {
723       if (e instanceof InterruptedException) {
724         Thread.currentThread().interrupt();
725       }
726       throw new IOException("Failed getting MERGING znode on "
727         + mergedRegionInfo.getRegionNameAsString(), e);
728     }
729   }
730 
731   /**
732    * Create reference file(s) of merging regions under the region_a merges dir
733    * @param hstoreFilesOfRegionA
734    * @param hstoreFilesOfRegionB
735    * @throws IOException
736    */
737   private void mergeStoreFiles(
738       Map<byte[], List<StoreFile>> hstoreFilesOfRegionA,
739       Map<byte[], List<StoreFile>> hstoreFilesOfRegionB)
740       throws IOException {
741     // Create reference file(s) of region A in mergdir
742     HRegionFileSystem fs_a = this.region_a.getRegionFileSystem();
743     for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesOfRegionA
744         .entrySet()) {
745       String familyName = Bytes.toString(entry.getKey());
746       for (StoreFile storeFile : entry.getValue()) {
747         fs_a.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
748             this.mergesdir);
749       }
750     }
751     // Create reference file(s) of region B in mergedir
752     HRegionFileSystem fs_b = this.region_b.getRegionFileSystem();
753     for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesOfRegionB
754         .entrySet()) {
755       String familyName = Bytes.toString(entry.getKey());
756       for (StoreFile storeFile : entry.getValue()) {
757         fs_b.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
758             this.mergesdir);
759       }
760     }
761   }
762 
763   /**
764    * @param server Hosting server instance (May be null when testing).
765    * @param services Services of regionserver, used to online regions.
766    * @throws IOException If thrown, rollback failed. Take drastic action.
767    * @return True if we successfully rolled back, false if we got to the point
768    *         of no return and so now need to abort the server to minimize
769    *         damage.
770    */
771   @SuppressWarnings("deprecation")
772   public boolean rollback(final Server server,
773       final RegionServerServices services) throws IOException {
774     assert this.mergedRegionInfo != null;
775     // Coprocessor callback
776     if (rsCoprocessorHost != null) {
777       rsCoprocessorHost.preRollBackMerge(this.region_a, this.region_b);
778     }
779 
780     boolean result = true;
781     ListIterator<JournalEntry> iterator = this.journal
782         .listIterator(this.journal.size());
783     // Iterate in reverse.
784     while (iterator.hasPrevious()) {
785       JournalEntry je = iterator.previous();
786       switch (je) {
787 
788         case SET_MERGING_IN_ZK:
789           if (useZKAndZKIsSet(server)) {
790             cleanZK(server, this.mergedRegionInfo);
791           } else if (services != null && !useZKForAssignment
792               && !services.reportRegionStateTransition(TransitionCode.MERGE_REVERTED,
793                   mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo())) {
794             return false;
795           }
796           break;
797 
798         case CREATED_MERGE_DIR:
799           this.region_a.writestate.writesEnabled = true;
800           this.region_b.writestate.writesEnabled = true;
801           this.region_a.getRegionFileSystem().cleanupMergesDir();
802           break;
803 
804         case CLOSED_REGION_A:
805           try {
806             // So, this returns a seqid but if we just closed and then reopened,
807             // we should be ok. On close, we flushed using sequenceid obtained
808             // from hosting regionserver so no need to propagate the sequenceid
809             // returned out of initialize below up into regionserver as we
810             // normally do.
811             this.region_a.initialize();
812           } catch (IOException e) {
813             LOG.error("Failed rollbacking CLOSED_REGION_A of region "
814                 + this.region_a.getRegionNameAsString(), e);
815             throw new RuntimeException(e);
816           }
817           break;
818 
819         case OFFLINED_REGION_A:
820           if (services != null)
821             services.addToOnlineRegions(this.region_a);
822           break;
823 
824         case CLOSED_REGION_B:
825           try {
826             this.region_b.initialize();
827           } catch (IOException e) {
828             LOG.error("Failed rollbacking CLOSED_REGION_A of region "
829                 + this.region_b.getRegionNameAsString(), e);
830             throw new RuntimeException(e);
831           }
832           break;
833 
834         case OFFLINED_REGION_B:
835           if (services != null)
836             services.addToOnlineRegions(this.region_b);
837           break;
838 
839         case STARTED_MERGED_REGION_CREATION:
840           this.region_a.getRegionFileSystem().cleanupMergedRegion(
841               this.mergedRegionInfo);
842           break;
843 
844         case PONR:
845           // We got to the point-of-no-return so we need to just abort. Return
846           // immediately. Do not clean up created merged regions.
847           return false;
848 
849         default:
850           throw new RuntimeException("Unhandled journal entry: " + je);
851       }
852     }
853     // Coprocessor callback
854     if (rsCoprocessorHost != null) {
855       rsCoprocessorHost.postRollBackMerge(this.region_a, this.region_b);
856     }
857 
858     return result;
859   }
860 
861   HRegionInfo getMergedRegionInfo() {
862     return this.mergedRegionInfo;
863   }
864 
865   // For unit testing.
866   Path getMergesDir() {
867     return this.mergesdir;
868   }
869 
870   private boolean useZKAndZKIsSet(final Server server) {
871     return server != null && useZKForAssignment && server.getZooKeeper() != null;
872   }
873 
874   private static void cleanZK(final Server server, final HRegionInfo hri) {
875     try {
876       // Only delete if its in expected state; could have been hijacked.
877       if (!ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
878           RS_ZK_REQUEST_REGION_MERGE, server.getServerName())) {
879         ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
880           RS_ZK_REGION_MERGING, server.getServerName());
881       }
882     } catch (KeeperException.NoNodeException e) {
883       LOG.info("Failed cleanup zk node of " + hri.getRegionNameAsString(), e);
884     } catch (KeeperException e) {
885       server.abort("Failed cleanup zk node of " + hri.getRegionNameAsString(),e);
886     }
887   }
888 
889   /**
890    * Creates a new ephemeral node in the PENDING_MERGE state for the merged region.
891    * Create it ephemeral in case regionserver dies mid-merge.
892    *
893    * <p>
894    * Does not transition nodes from other states. If a node already exists for
895    * this region, a {@link NodeExistsException} will be thrown.
896    *
897    * @param zkw zk reference
898    * @param region region to be created as offline
899    * @param serverName server event originates from
900    * @throws KeeperException
901    * @throws IOException
902    */
903   public static void createNodeMerging(final ZooKeeperWatcher zkw, final HRegionInfo region,
904       final ServerName serverName, final HRegionInfo a,
905       final HRegionInfo b) throws KeeperException, IOException {
906     LOG.debug(zkw.prefix("Creating ephemeral node for "
907       + region.getEncodedName() + " in PENDING_MERGE state"));
908     byte [] payload = HRegionInfo.toDelimitedByteArray(region, a, b);
909     RegionTransition rt = RegionTransition.createRegionTransition(
910       RS_ZK_REQUEST_REGION_MERGE, region.getRegionName(), serverName, payload);
911     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
912     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
913       throw new IOException("Failed create of ephemeral " + node);
914     }
915   }
916 
917   /**
918    * Transitions an existing ephemeral node for the specified region which is
919    * currently in the begin state to be in the end state. Master cleans up the
920    * final MERGE znode when it reads it (or if we crash, zk will clean it up).
921    *
922    * <p>
923    * Does not transition nodes from other states. If for some reason the node
924    * could not be transitioned, the method returns -1. If the transition is
925    * successful, the version of the node after transition is returned.
926    *
927    * <p>
928    * This method can fail and return false for three different reasons:
929    * <ul>
930    * <li>Node for this region does not exist</li>
931    * <li>Node for this region is not in the begin state</li>
932    * <li>After verifying the begin state, update fails because of wrong version
933    * (this should never actually happen since an RS only does this transition
934    * following a transition to the begin state. If two RS are conflicting, one would
935    * fail the original transition to the begin state and not this transition)</li>
936    * </ul>
937    *
938    * <p>
939    * Does not set any watches.
940    *
941    * <p>
942    * This method should only be used by a RegionServer when merging two regions.
943    *
944    * @param zkw zk reference
945    * @param merged region to be transitioned to opened
946    * @param a merging region A
947    * @param b merging region B
948    * @param serverName server event originates from
949    * @param znodeVersion expected version of data before modification
950    * @param beginState the expected current state the znode should be
951    * @param endState the state to be transition to
952    * @return version of node after transition, -1 if unsuccessful transition
953    * @throws KeeperException if unexpected zookeeper exception
954    * @throws IOException
955    */
956   public static int transitionMergingNode(ZooKeeperWatcher zkw,
957       HRegionInfo merged, HRegionInfo a, HRegionInfo b, ServerName serverName,
958       final int znodeVersion, final EventType beginState,
959       final EventType endState) throws KeeperException, IOException {
960     byte[] payload = HRegionInfo.toDelimitedByteArray(merged, a, b);
961     return ZKAssign.transitionNode(zkw, merged, serverName,
962       beginState, endState, znodeVersion, payload);
963   }
964 
965   /**
966    * Checks if the given region has merge qualifier in hbase:meta
967    * @param services
968    * @param regionName name of specified region
969    * @return true if the given region has merge qualifier in META.(It will be
970    *         cleaned by CatalogJanitor)
971    * @throws IOException
972    */
973   boolean hasMergeQualifierInMeta(final RegionServerServices services,
974       final byte[] regionName) throws IOException {
975     if (services == null) return false;
976     // Get merge regions if it is a merged region and already has merge
977     // qualifier
978     Pair<HRegionInfo, HRegionInfo> mergeRegions = MetaReader
979         .getRegionsFromMergeQualifier(services.getCatalogTracker(), regionName);
980     if (mergeRegions != null &&
981         (mergeRegions.getFirst() != null || mergeRegions.getSecond() != null)) {
982       // It has merge qualifier
983       return true;
984     }
985     return false;
986   }
987 }