View Javadoc

1   /**
2    * Copyright The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements. See the NOTICE file distributed with this
6    * work for additional information regarding copyright ownership. The ASF
7    * licenses this file to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance with the License.
9    * You may obtain a copy of the License at
10   *
11   * http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
16   * License for the specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_MERGED;
22  import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_MERGING;
23  import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REQUEST_REGION_MERGE;
24  
25  import java.io.IOException;
26  import java.io.InterruptedIOException;
27  import java.security.PrivilegedExceptionAction;
28  import java.util.ArrayList;
29  import java.util.List;
30  import java.util.ListIterator;
31  import java.util.Map;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.hbase.classification.InterfaceAudience;
36  import org.apache.hadoop.fs.Path;
37  import org.apache.hadoop.hbase.HConstants;
38  import org.apache.hadoop.hbase.HRegionInfo;
39  import org.apache.hadoop.hbase.MetaMutationAnnotation;
40  import org.apache.hadoop.hbase.RegionTransition;
41  import org.apache.hadoop.hbase.Server;
42  import org.apache.hadoop.hbase.ServerName;
43  import org.apache.hadoop.hbase.catalog.CatalogTracker;
44  import org.apache.hadoop.hbase.catalog.MetaEditor;
45  import org.apache.hadoop.hbase.catalog.MetaReader;
46  import org.apache.hadoop.hbase.client.Delete;
47  import org.apache.hadoop.hbase.client.Mutation;
48  import org.apache.hadoop.hbase.client.Put;
49  import org.apache.hadoop.hbase.executor.EventType;
50  import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
51  import org.apache.hadoop.hbase.regionserver.SplitTransaction.LoggingProgressable;
52  import org.apache.hadoop.hbase.security.User;
53  import org.apache.hadoop.hbase.util.Bytes;
54  import org.apache.hadoop.hbase.util.ConfigUtil;
55  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
56  import org.apache.hadoop.hbase.util.Pair;
57  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
58  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
59  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
60  import org.apache.zookeeper.KeeperException;
61  import org.apache.zookeeper.KeeperException.NodeExistsException;
62  import org.apache.zookeeper.data.Stat;
63  
64  /**
65   * Executes region merge as a "transaction". It is similar with
66   * SplitTransaction. Call {@link #prepare(RegionServerServices)} to setup the
67   * transaction, {@link #execute(Server, RegionServerServices)} to run the
68   * transaction and {@link #rollback(Server, RegionServerServices)} to cleanup if
69   * execute fails.
70   * 
71   * <p>
72   * Here is an example of how you would use this class:
73   * 
74   * <pre>
75   *  RegionMergeTransaction mt = new RegionMergeTransaction(this.conf, parent, midKey)
76   *  if (!mt.prepare(services)) return;
77   *  try {
78   *    mt.execute(server, services);
79   *  } catch (IOException ioe) {
80   *    try {
81   *      mt.rollback(server, services);
82   *      return;
83   *    } catch (RuntimeException e) {
84   *      myAbortable.abort("Failed merge, abort");
85   *    }
86   *  }
87   * </Pre>
88   * <p>
89   * This class is not thread safe. Caller needs ensure merge is run by one thread
90   * only.
91   */
92  @InterfaceAudience.Private
93  public class RegionMergeTransaction {
94    private static final Log LOG = LogFactory.getLog(RegionMergeTransaction.class);
95  
96    // Merged region info
97    private HRegionInfo mergedRegionInfo;
98    // region_a sorts before region_b
99    private final HRegion region_a;
100   private final HRegion region_b;
101   // merges dir is under region_a
102   private final Path mergesdir;
103   private int znodeVersion = -1;
104   // We only merge adjacent regions if forcible is false
105   private final boolean forcible;
106   private boolean useZKForAssignment;
107   private final long masterSystemTime;
108 
109   /**
110    * Types to add to the transaction journal. Each enum is a step in the merge
111    * transaction. Used to figure how much we need to rollback.
112    */
113   enum JournalEntry {
114     /**
115      * Set region as in transition, set it into MERGING state.
116      */
117     SET_MERGING_IN_ZK,
118     /**
119      * We created the temporary merge data directory.
120      */
121     CREATED_MERGE_DIR,
122     /**
123      * Closed the merging region A.
124      */
125     CLOSED_REGION_A,
126     /**
127      * The merging region A has been taken out of the server's online regions list.
128      */
129     OFFLINED_REGION_A,
130     /**
131      * Closed the merging region B.
132      */
133     CLOSED_REGION_B,
134     /**
135      * The merging region B has been taken out of the server's online regions list.
136      */
137     OFFLINED_REGION_B,
138     /**
139      * Started in on creation of the merged region.
140      */
141     STARTED_MERGED_REGION_CREATION,
142     /**
143      * Point of no return. If we got here, then transaction is not recoverable
144      * other than by crashing out the regionserver.
145      */
146     PONR
147   }
148 
149   /*
150    * Journal of how far the merge transaction has progressed.
151    */
152   private final List<JournalEntry> journal = new ArrayList<JournalEntry>();
153 
154   private static IOException closedByOtherException = new IOException(
155       "Failed to close region: already closed by another thread");
156 
157   private RegionServerCoprocessorHost rsCoprocessorHost = null;
158 
159   /**
160    * Constructor
161    * @param a region a to merge
162    * @param b region b to merge
163    * @param forcible if false, we will only merge adjacent regions
164    */
165   public RegionMergeTransaction(final HRegion a, final HRegion b,
166       final boolean forcible) {
167     this(a, b, forcible, EnvironmentEdgeManager.currentTimeMillis());
168   }
169 
170   /**
171    * Constructor
172    * @param a region a to merge
173    * @param b region b to merge
174    * @param forcible if false, we will only merge adjacent regions
175    * @param masterSystemTime the time at the master side
176    */
177   public RegionMergeTransaction(final HRegion a, final HRegion b,
178       final boolean forcible, long masterSystemTime) {
179     if (a.getRegionInfo().compareTo(b.getRegionInfo()) <= 0) {
180       this.region_a = a;
181       this.region_b = b;
182     } else {
183       this.region_a = b;
184       this.region_b = a;
185     }
186     this.forcible = forcible;
187     this.masterSystemTime = masterSystemTime;
188     this.mergesdir = region_a.getRegionFileSystem().getMergesDir();
189   }
190 
191   /**
192    * Does checks on merge inputs.
193    * @param services
194    * @return <code>true</code> if the regions are mergeable else
195    *         <code>false</code> if they are not (e.g. its already closed, etc.).
196    */
197   public boolean prepare(final RegionServerServices services) {
198     if (!region_a.getTableDesc().getTableName()
199         .equals(region_b.getTableDesc().getTableName())) {
200       LOG.info("Can't merge regions " + region_a + "," + region_b
201           + " because they do not belong to the same table");
202       return false;
203     }
204     if (region_a.getRegionInfo().equals(region_b.getRegionInfo())) {
205       LOG.info("Can't merge the same region " + region_a);
206       return false;
207     }
208     if (!forcible && !HRegionInfo.areAdjacent(region_a.getRegionInfo(),
209             region_b.getRegionInfo())) {
210       String msg = "Skip merging " + this.region_a.getRegionNameAsString()
211           + " and " + this.region_b.getRegionNameAsString()
212           + ", because they are not adjacent.";
213       LOG.info(msg);
214       return false;
215     }
216     if (!this.region_a.isMergeable() || !this.region_b.isMergeable()) {
217       return false;
218     }
219     try {
220       boolean regionAHasMergeQualifier = hasMergeQualifierInMeta(services,
221           region_a.getRegionName());
222       if (regionAHasMergeQualifier ||
223           hasMergeQualifierInMeta(services, region_b.getRegionName())) {
224         LOG.debug("Region " + (regionAHasMergeQualifier ? region_a.getRegionNameAsString()
225                 : region_b.getRegionNameAsString())
226             + " is not mergeable because it has merge qualifier in META");
227         return false;
228       }
229     } catch (IOException e) {
230       LOG.warn("Failed judging whether merge transaction is available for "
231               + region_a.getRegionNameAsString() + " and "
232               + region_b.getRegionNameAsString(), e);
233       return false;
234     }
235 
236     // WARN: make sure there is no parent region of the two merging regions in
237     // hbase:meta If exists, fixing up daughters would cause daughter regions(we
238     // have merged one) online again when we restart master, so we should clear
239     // the parent region to prevent the above case
240     // Since HBASE-7721, we don't need fix up daughters any more. so here do
241     // nothing
242 
243     this.mergedRegionInfo = getMergedRegionInfo(region_a.getRegionInfo(),
244         region_b.getRegionInfo());
245     return true;
246   }
247 
248   /**
249    * Run the transaction.
250    * @param server Hosting server instance. Can be null when testing (won't try
251    *          and update in zk if a null server)
252    * @param services Used to online/offline regions.
253    * @throws IOException If thrown, transaction failed. Call
254    *           {@link #rollback(Server, RegionServerServices)}
255    * @return merged region
256    * @throws IOException
257    * @see #rollback(Server, RegionServerServices)
258    */
259   public HRegion execute(final Server server,
260       final RegionServerServices services) throws IOException {
261     if (User.isHBaseSecurityEnabled(region_a.getBaseConf())) {
262       LOG.warn("Should use execute(Server, RegionServerServices, User)");
263     }
264     return execute(server, services, null);
265   }
266 
267   public HRegion execute(final Server server, final RegionServerServices services, User user)
268       throws IOException {
269     useZKForAssignment = server == null ? true :
270       ConfigUtil.useZKForAssignment(server.getConfiguration());
271     if (rsCoprocessorHost == null) {
272       rsCoprocessorHost = server != null ? ((HRegionServer) server).getCoprocessorHost() : null;
273     }
274     final HRegion mergedRegion = createMergedRegion(server, services, user);
275     if (rsCoprocessorHost != null) {
276       if (user == null) {
277         rsCoprocessorHost.postMergeCommit(this.region_a, this.region_b, mergedRegion);
278       } else {
279         try {
280           user.getUGI().doAs(new PrivilegedExceptionAction<Void>() {
281             @Override
282             public Void run() throws Exception {
283               rsCoprocessorHost.postMergeCommit(region_a, region_b, mergedRegion);
284               return null;
285             }
286           });
287         } catch (InterruptedException ie) {
288           InterruptedIOException iioe = new InterruptedIOException();
289           iioe.initCause(ie);
290           throw iioe;
291         }
292       }
293     }
294     stepsAfterPONR(server, services, mergedRegion, user);
295     return mergedRegion;
296   }
297 
298   @Deprecated
299   public void stepsAfterPONR(final Server server, final RegionServerServices services,
300       final HRegion mergedRegion) throws IOException {
301     stepsAfterPONR(server, services, mergedRegion, null);
302   }
303 
304   public void stepsAfterPONR(final Server server, final RegionServerServices services,
305       final HRegion mergedRegion, User user) throws IOException {
306     openMergedRegion(server, services, mergedRegion);
307     transitionZKNode(server, services, mergedRegion, user);
308   }
309 
310   /**
311    * Prepare the merged region and region files.
312    * @param server Hosting server instance. Can be null when testing (won't try
313    *          and update in zk if a null server)
314    * @param services Used to online/offline regions.
315    * @return merged region
316    * @throws IOException If thrown, transaction failed. Call
317    *           {@link #rollback(Server, RegionServerServices)}
318    */
319   HRegion createMergedRegion(final Server server,
320       final RegionServerServices services, User user) throws IOException {
321     LOG.info("Starting merge of " + region_a + " and "
322         + region_b.getRegionNameAsString() + ", forcible=" + forcible);
323     if ((server != null && server.isStopped())
324         || (services != null && services.isStopping())) {
325       throw new IOException("Server is stopped or stopping");
326     }
327 
328     if (rsCoprocessorHost != null) {
329       boolean ret = false;
330       if (user == null) {
331         ret = rsCoprocessorHost.preMerge(region_a, region_b);
332       } else {
333         try {
334           ret = user.getUGI().doAs(new PrivilegedExceptionAction<Boolean>() {
335             @Override
336             public Boolean run() throws Exception {
337               return rsCoprocessorHost.preMerge(region_a, region_b);
338             }
339           });
340         } catch (InterruptedException ie) {
341           InterruptedIOException iioe = new InterruptedIOException();
342           iioe.initCause(ie);
343           throw iioe;
344         }
345       }
346       if (ret) {
347         throw new IOException("Coprocessor bypassing regions " + this.region_a + " "
348             + this.region_b + " merge.");
349       }
350     }
351 
352     // If true, no cluster to write meta edits to or to update znodes in.
353     boolean testing = server == null ? true : server.getConfiguration()
354         .getBoolean("hbase.testing.nocluster", false);
355 
356     HRegion mergedRegion = stepsBeforePONR(server, services, testing);
357 
358     @MetaMutationAnnotation
359     final List<Mutation> metaEntries = new ArrayList<Mutation>();
360     if (rsCoprocessorHost != null) {
361       boolean ret = false;
362       if (user == null) {
363         ret = rsCoprocessorHost.preMergeCommit(region_a, region_b, metaEntries);
364       } else {
365         try {
366           ret = user.getUGI().doAs(new PrivilegedExceptionAction<Boolean>() {
367             @Override
368             public Boolean run() throws Exception {
369               return rsCoprocessorHost.preMergeCommit(region_a, region_b, metaEntries);
370             }
371           });
372         } catch (InterruptedException ie) {
373           InterruptedIOException iioe = new InterruptedIOException();
374           iioe.initCause(ie);
375           throw iioe;
376         }
377       }
378 
379       if (ret) {
380         throw new IOException("Coprocessor bypassing regions " + this.region_a + " "
381             + this.region_b + " merge.");
382       }
383       try {
384         for (Mutation p : metaEntries) {
385           HRegionInfo.parseRegionName(p.getRow());
386         }
387       } catch (IOException e) {
388         LOG.error("Row key of mutation from coprocessor is not parsable as region name."
389             + "Mutations from coprocessor should only be for hbase:meta table.", e);
390         throw e;
391       }
392     }
393 
394     // This is the point of no return. Similar with SplitTransaction.
395     // IF we reach the PONR then subsequent failures need to crash out this
396     // regionserver
397     this.journal.add(JournalEntry.PONR);
398 
399     // Add merged region and delete region_a and region_b
400     // as an atomic update. See HBASE-7721. This update to hbase:meta makes the region
401     // will determine whether the region is merged or not in case of failures.
402     // If it is successful, master will roll-forward, if not, master will
403     // rollback
404     if (!testing && useZKForAssignment) {
405       if (metaEntries.isEmpty()) {
406         MetaEditor.mergeRegions(server.getCatalogTracker(), mergedRegion.getRegionInfo(), region_a
407             .getRegionInfo(), region_b.getRegionInfo(), server.getServerName(), masterSystemTime);
408       } else {
409         mergeRegionsAndPutMetaEntries(server.getCatalogTracker(), mergedRegion.getRegionInfo(),
410           region_a.getRegionInfo(), region_b.getRegionInfo(), server.getServerName(), metaEntries);
411       }
412     } else if (services != null && !useZKForAssignment) {
413       if (!services.reportRegionStateTransition(TransitionCode.MERGE_PONR,
414           mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo())) {
415         // Passed PONR, let SSH clean it up
416         throw new IOException("Failed to notify master that merge passed PONR: "
417           + region_a.getRegionInfo().getRegionNameAsString() + " and "
418           + region_b.getRegionInfo().getRegionNameAsString());
419       }
420     }
421     return mergedRegion;
422   }
423 
424   private void mergeRegionsAndPutMetaEntries(CatalogTracker catalogTracker,
425       HRegionInfo mergedRegion, HRegionInfo regionA, HRegionInfo regionB, ServerName serverName,
426       List<Mutation> metaEntries) throws IOException {
427     prepareMutationsForMerge(mergedRegion, regionA, regionB, serverName, metaEntries);
428     MetaEditor.mutateMetaTable(catalogTracker, metaEntries);
429   }
430 
431   public void prepareMutationsForMerge(HRegionInfo mergedRegion, HRegionInfo regionA,
432       HRegionInfo regionB, ServerName serverName, List<Mutation> mutations) throws IOException {
433     HRegionInfo copyOfMerged = new HRegionInfo(mergedRegion);
434 
435     // use the maximum of what master passed us vs local time.
436     long time = Math.max(EnvironmentEdgeManager.currentTimeMillis(), masterSystemTime);
437 
438     // Put for parent
439     Put putOfMerged = MetaEditor.makePutFromRegionInfo(copyOfMerged, time);
440     putOfMerged.add(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER, regionA.toByteArray());
441     putOfMerged.add(HConstants.CATALOG_FAMILY, HConstants.MERGEB_QUALIFIER, regionB.toByteArray());
442     mutations.add(putOfMerged);
443     // Deletes for merging regions
444     Delete deleteA = MetaEditor.makeDeleteFromRegionInfo(regionA, time);
445     Delete deleteB = MetaEditor.makeDeleteFromRegionInfo(regionB, time);
446     mutations.add(deleteA);
447     mutations.add(deleteB);
448     // The merged is a new region, openSeqNum = 1 is fine.
449     addLocation(putOfMerged, serverName, 1);
450   }
451 
452   @SuppressWarnings("deprecation")
453   public Put addLocation(final Put p, final ServerName sn, long openSeqNum) {
454     p.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes
455         .toBytes(sn.getHostAndPort()));
456     p.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(sn
457         .getStartcode()));
458     p.add(HConstants.CATALOG_FAMILY, HConstants.SEQNUM_QUALIFIER, Bytes.toBytes(openSeqNum));
459     return p;
460   }
461 
462   public HRegion stepsBeforePONR(final Server server, final RegionServerServices services,
463       boolean testing) throws IOException {
464     // Set ephemeral MERGING znode up in zk. Mocked servers sometimes don't
465     // have zookeeper so don't do zk stuff if server or zookeeper is null
466     if (useZKAndZKIsSet(server)) {
467       try {
468         createNodeMerging(server.getZooKeeper(), this.mergedRegionInfo,
469           server.getServerName(), region_a.getRegionInfo(), region_b.getRegionInfo());
470       } catch (KeeperException e) {
471         throw new IOException("Failed creating PENDING_MERGE znode on "
472             + this.mergedRegionInfo.getRegionNameAsString(), e);
473       }
474     } else if (services != null && !useZKForAssignment) {
475       if (!services.reportRegionStateTransition(TransitionCode.READY_TO_MERGE,
476           mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo())) {
477         throw new IOException("Failed to get ok from master to merge "
478           + region_a.getRegionInfo().getRegionNameAsString() + " and "
479           + region_b.getRegionInfo().getRegionNameAsString());
480       }
481     }
482     this.journal.add(JournalEntry.SET_MERGING_IN_ZK);
483     if (useZKAndZKIsSet(server)) {
484       // After creating the merge node, wait for master to transition it
485       // from PENDING_MERGE to MERGING so that we can move on. We want master
486       // knows about it and won't transition any region which is merging.
487       znodeVersion = getZKNode(server, services);
488     }
489 
490     this.region_a.getRegionFileSystem().createMergesDir();
491     this.journal.add(JournalEntry.CREATED_MERGE_DIR);
492 
493     Map<byte[], List<StoreFile>> hstoreFilesOfRegionA = closeAndOfflineRegion(
494         services, this.region_a, true, testing);
495     Map<byte[], List<StoreFile>> hstoreFilesOfRegionB = closeAndOfflineRegion(
496         services, this.region_b, false, testing);
497 
498     assert hstoreFilesOfRegionA != null && hstoreFilesOfRegionB != null;
499 
500 
501     //
502     // mergeStoreFiles creates merged region dirs under the region_a merges dir
503     // Nothing to unroll here if failure -- clean up of CREATE_MERGE_DIR will
504     // clean this up.
505     mergeStoreFiles(hstoreFilesOfRegionA, hstoreFilesOfRegionB);
506 
507     if (server != null && useZKAndZKIsSet(server)) {
508       try {
509         // Do one more check on the merging znode (before it is too late) in case
510         // any merging region is moved somehow. If so, the znode transition will fail.
511         this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
512           this.mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo(),
513           server.getServerName(), this.znodeVersion,
514           RS_ZK_REGION_MERGING, RS_ZK_REGION_MERGING);
515       } catch (KeeperException e) {
516         throw new IOException("Failed setting MERGING znode on "
517             + this.mergedRegionInfo.getRegionNameAsString(), e);
518       }
519     }
520 
521     // Log to the journal that we are creating merged region. We could fail
522     // halfway through. If we do, we could have left
523     // stuff in fs that needs cleanup -- a storefile or two. Thats why we
524     // add entry to journal BEFORE rather than AFTER the change.
525     this.journal.add(JournalEntry.STARTED_MERGED_REGION_CREATION);
526     HRegion mergedRegion = createMergedRegionFromMerges(this.region_a,
527         this.region_b, this.mergedRegionInfo);
528     return mergedRegion;
529   }
530 
531   /**
532    * Create a merged region from the merges directory under region a. In order
533    * to mock it for tests, place it with a new method.
534    * @param a hri of region a
535    * @param b hri of region b
536    * @param mergedRegion hri of merged region
537    * @return merged HRegion.
538    * @throws IOException
539    */
540   HRegion createMergedRegionFromMerges(final HRegion a, final HRegion b,
541       final HRegionInfo mergedRegion) throws IOException {
542     return a.createMergedRegionFromMerges(mergedRegion, b);
543   }
544 
545   /**
546    * Close the merging region and offline it in regionserver
547    * @param services
548    * @param region
549    * @param isRegionA true if it is merging region a, false if it is region b
550    * @param testing true if it is testing
551    * @return a map of family name to list of store files
552    * @throws IOException
553    */
554   private Map<byte[], List<StoreFile>> closeAndOfflineRegion(
555       final RegionServerServices services, final HRegion region,
556       final boolean isRegionA, final boolean testing) throws IOException {
557     Map<byte[], List<StoreFile>> hstoreFilesToMerge = null;
558     Exception exceptionToThrow = null;
559     try {
560       hstoreFilesToMerge = region.close(false);
561     } catch (Exception e) {
562       exceptionToThrow = e;
563     }
564     if (exceptionToThrow == null && hstoreFilesToMerge == null) {
565       // The region was closed by a concurrent thread. We can't continue
566       // with the merge, instead we must just abandon the merge. If we
567       // reopen or merge this could cause problems because the region has
568       // probably already been moved to a different server, or is in the
569       // process of moving to a different server.
570       exceptionToThrow = closedByOtherException;
571     }
572     if (exceptionToThrow != closedByOtherException) {
573       this.journal.add(isRegionA ? JournalEntry.CLOSED_REGION_A
574           : JournalEntry.CLOSED_REGION_B);
575     }
576     if (exceptionToThrow != null) {
577       if (exceptionToThrow instanceof IOException)
578         throw (IOException) exceptionToThrow;
579       throw new IOException(exceptionToThrow);
580     }
581 
582     if (!testing) {
583       services.removeFromOnlineRegions(region, null);
584     }
585     this.journal.add(isRegionA ? JournalEntry.OFFLINED_REGION_A
586         : JournalEntry.OFFLINED_REGION_B);
587     return hstoreFilesToMerge;
588   }
589 
590   /**
591    * Get merged region info through the specified two regions
592    * @param a merging region A
593    * @param b merging region B
594    * @return the merged region info
595    */
596   public static HRegionInfo getMergedRegionInfo(final HRegionInfo a,
597       final HRegionInfo b) {
598     long rid = EnvironmentEdgeManager.currentTimeMillis();
599     // Regionid is timestamp. Merged region's id can't be less than that of
600     // merging regions else will insert at wrong location in hbase:meta
601     if (rid < a.getRegionId() || rid < b.getRegionId()) {
602       LOG.warn("Clock skew; merging regions id are " + a.getRegionId()
603           + " and " + b.getRegionId() + ", but current time here is " + rid);
604       rid = Math.max(a.getRegionId(), b.getRegionId()) + 1;
605     }
606 
607     byte[] startKey = null;
608     byte[] endKey = null;
609     // Choose the smaller as start key
610     if (a.compareTo(b) <= 0) {
611       startKey = a.getStartKey();
612     } else {
613       startKey = b.getStartKey();
614     }
615     // Choose the bigger as end key
616     if (Bytes.equals(a.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)
617         || (!Bytes.equals(b.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)
618             && Bytes.compareTo(a.getEndKey(), b.getEndKey()) > 0)) {
619       endKey = a.getEndKey();
620     } else {
621       endKey = b.getEndKey();
622     }
623 
624     // Merged region is sorted between two merging regions in META
625     HRegionInfo mergedRegionInfo = new HRegionInfo(a.getTable(), startKey,
626         endKey, false, rid);
627     return mergedRegionInfo;
628   }
629 
630   /**
631    * Perform time consuming opening of the merged region.
632    * @param server Hosting server instance. Can be null when testing (won't try
633    *          and update in zk if a null server)
634    * @param services Used to online/offline regions.
635    * @param merged the merged region
636    * @throws IOException If thrown, transaction failed. Call
637    *           {@link #rollback(Server, RegionServerServices)}
638    */
639   void openMergedRegion(final Server server,
640       final RegionServerServices services, HRegion merged) throws IOException {
641     boolean stopped = server != null && server.isStopped();
642     boolean stopping = services != null && services.isStopping();
643     if (stopped || stopping) {
644       LOG.info("Not opening merged region  " + merged.getRegionNameAsString()
645           + " because stopping=" + stopping + ", stopped=" + stopped);
646       return;
647     }
648     HRegionInfo hri = merged.getRegionInfo();
649     LoggingProgressable reporter = server == null ? null
650         : new LoggingProgressable(hri, server.getConfiguration().getLong(
651             "hbase.regionserver.regionmerge.open.log.interval", 10000));
652     merged.openHRegion(reporter);
653 
654     if (services != null) {
655       try {
656         if (useZKForAssignment) {
657           services.postOpenDeployTasks(merged, server.getCatalogTracker());
658         } else if (!services.reportRegionStateTransition(TransitionCode.MERGED,
659             mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo())) {
660           throw new IOException("Failed to report merged region to master: "
661             + mergedRegionInfo.getShortNameToLog());
662         }
663         services.addToOnlineRegions(merged);
664       } catch (KeeperException ke) {
665         throw new IOException(ke);
666       }
667     }
668 
669   }
670 
671   /**
672    * Finish off merge transaction, transition the zknode
673    * @param server Hosting server instance. Can be null when testing (won't try
674    *          and update in zk if a null server)
675    * @param services Used to online/offline regions.
676    * @throws IOException If thrown, transaction failed. Call
677    *           {@link #rollback(Server, RegionServerServices)}
678    */
679   void transitionZKNode(final Server server, final RegionServerServices services,
680       final HRegion mergedRegion, User user) throws IOException {
681     if (useZKAndZKIsSet(server)) {
682       // Tell master about merge by updating zk. If we fail, abort.
683       try {
684         this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
685           this.mergedRegionInfo, region_a.getRegionInfo(),
686           region_b.getRegionInfo(), server.getServerName(), this.znodeVersion,
687           RS_ZK_REGION_MERGING, RS_ZK_REGION_MERGED);
688   
689         long startTime = EnvironmentEdgeManager.currentTimeMillis();
690         int spins = 0;
691         // Now wait for the master to process the merge. We know it's done
692         // when the znode is deleted. The reason we keep tickling the znode is
693         // that it's possible for the master to miss an event.
694         do {
695           if (spins % 10 == 0) {
696             LOG.debug("Still waiting on the master to process the merge for "
697                 + this.mergedRegionInfo.getEncodedName() + ", waited "
698                 + (EnvironmentEdgeManager.currentTimeMillis() - startTime) + "ms");
699           }
700           Thread.sleep(100);
701           // When this returns -1 it means the znode doesn't exist
702           this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
703             this.mergedRegionInfo, region_a.getRegionInfo(),
704             region_b.getRegionInfo(), server.getServerName(), this.znodeVersion,
705             RS_ZK_REGION_MERGED, RS_ZK_REGION_MERGED);
706           spins++;
707         } while (this.znodeVersion != -1 && !server.isStopped()
708             && !services.isStopping());
709       } catch (Exception e) {
710         if (e instanceof InterruptedException) {
711           Thread.currentThread().interrupt();
712         }
713         throw new IOException("Failed telling master about merge "
714             + mergedRegionInfo.getEncodedName(), e);
715       }
716     }
717 
718     if (rsCoprocessorHost != null) {
719       if (user == null) {
720         rsCoprocessorHost.postMerge(region_a, region_b, mergedRegion);
721       } else {
722         try {
723           user.getUGI().doAs(new PrivilegedExceptionAction<Void>() {
724             @Override
725             public Void run() throws Exception {
726               rsCoprocessorHost.postMerge(region_a, region_b, mergedRegion);
727               return null;
728             }
729           });
730         } catch (InterruptedException ie) {
731           InterruptedIOException iioe = new InterruptedIOException();
732           iioe.initCause(ie);
733           throw iioe;
734         }
735       }
736     }
737 
738     // Leaving here, the mergedir with its dross will be in place but since the
739     // merge was successful, just leave it; it'll be cleaned when region_a is
740     // cleaned up by CatalogJanitor on master
741   }
742 
743   /**
744    * Wait for the merging node to be transitioned from pending_merge
745    * to merging by master. That's how we are sure master has processed
746    * the event and is good with us to move on. If we don't get any update,
747    * we periodically transition the node so that master gets the callback.
748    * If the node is removed or is not in pending_merge state any more,
749    * we abort the merge.
750    */
751   private int getZKNode(final Server server,
752       final RegionServerServices services) throws IOException {
753     // Wait for the master to process the pending_merge.
754     try {
755       int spins = 0;
756       Stat stat = new Stat();
757       ZooKeeperWatcher zkw = server.getZooKeeper();
758       ServerName expectedServer = server.getServerName();
759       String node = mergedRegionInfo.getEncodedName();
760       while (!(server.isStopped() || services.isStopping())) {
761         if (spins % 5 == 0) {
762           LOG.debug("Still waiting for master to process "
763             + "the pending_merge for " + node);
764           transitionMergingNode(zkw, mergedRegionInfo, region_a.getRegionInfo(),
765             region_b.getRegionInfo(), expectedServer, -1, RS_ZK_REQUEST_REGION_MERGE,
766             RS_ZK_REQUEST_REGION_MERGE);
767         }
768         Thread.sleep(100);
769         spins++;
770         byte [] data = ZKAssign.getDataNoWatch(zkw, node, stat);
771         if (data == null) {
772           throw new IOException("Data is null, merging node "
773             + node + " no longer exists");
774         }
775         RegionTransition rt = RegionTransition.parseFrom(data);
776         EventType et = rt.getEventType();
777         if (et == RS_ZK_REGION_MERGING) {
778           ServerName serverName = rt.getServerName();
779           if (!serverName.equals(expectedServer)) {
780             throw new IOException("Merging node " + node + " is for "
781               + serverName + ", not us " + expectedServer);
782           }
783           byte [] payloadOfMerging = rt.getPayload();
784           List<HRegionInfo> mergingRegions = HRegionInfo.parseDelimitedFrom(
785             payloadOfMerging, 0, payloadOfMerging.length);
786           assert mergingRegions.size() == 3;
787           HRegionInfo a = mergingRegions.get(1);
788           HRegionInfo b = mergingRegions.get(2);
789           HRegionInfo hri_a = region_a.getRegionInfo();
790           HRegionInfo hri_b = region_b.getRegionInfo();
791           if (!(hri_a.equals(a) && hri_b.equals(b))) {
792             throw new IOException("Merging node " + node + " is for " + a + ", "
793               + b + ", not expected regions: " + hri_a + ", " + hri_b);
794           }
795           // Master has processed it.
796           return stat.getVersion();
797         }
798         if (et != RS_ZK_REQUEST_REGION_MERGE) {
799           throw new IOException("Merging node " + node
800             + " moved out of merging to " + et);
801         }
802       }
803       // Server is stopping/stopped
804       throw new IOException("Server is "
805         + (services.isStopping() ? "stopping" : "stopped"));
806     } catch (Exception e) {
807       if (e instanceof InterruptedException) {
808         Thread.currentThread().interrupt();
809       }
810       throw new IOException("Failed getting MERGING znode on "
811         + mergedRegionInfo.getRegionNameAsString(), e);
812     }
813   }
814 
815   /**
816    * Create reference file(s) of merging regions under the region_a merges dir
817    * @param hstoreFilesOfRegionA
818    * @param hstoreFilesOfRegionB
819    * @throws IOException
820    */
821   private void mergeStoreFiles(
822       Map<byte[], List<StoreFile>> hstoreFilesOfRegionA,
823       Map<byte[], List<StoreFile>> hstoreFilesOfRegionB)
824       throws IOException {
825     // Create reference file(s) of region A in mergdir
826     HRegionFileSystem fs_a = this.region_a.getRegionFileSystem();
827     for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesOfRegionA
828         .entrySet()) {
829       String familyName = Bytes.toString(entry.getKey());
830       for (StoreFile storeFile : entry.getValue()) {
831         fs_a.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
832             this.mergesdir);
833       }
834     }
835     // Create reference file(s) of region B in mergedir
836     HRegionFileSystem fs_b = this.region_b.getRegionFileSystem();
837     for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesOfRegionB
838         .entrySet()) {
839       String familyName = Bytes.toString(entry.getKey());
840       for (StoreFile storeFile : entry.getValue()) {
841         fs_b.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
842             this.mergesdir);
843       }
844     }
845   }
846 
847   /**
848    * @param server Hosting server instance (May be null when testing).
849    * @param services Services of regionserver, used to online regions.
850    * @throws IOException If thrown, rollback failed. Take drastic action.
851    * @return True if we successfully rolled back, false if we got to the point
852    *         of no return and so now need to abort the server to minimize
853    *         damage.
854    */
855   @SuppressWarnings("deprecation")
856   public boolean rollback(final Server server,
857       final RegionServerServices services) throws IOException {
858     if (User.isHBaseSecurityEnabled(region_a.getBaseConf())) {
859       LOG.warn("Should use execute(Server, RegionServerServices, User)");
860     }
861     return rollback(server, services, null);
862   }
863 
864   public boolean rollback(final Server server,
865       final RegionServerServices services, User user) throws IOException {
866     assert this.mergedRegionInfo != null;
867     // Coprocessor callback
868     if (rsCoprocessorHost != null) {
869       if (user == null) {
870         rsCoprocessorHost.preRollBackMerge(region_a, region_b);
871       } else {
872         try {
873           user.getUGI().doAs(new PrivilegedExceptionAction<Void>() {
874             @Override
875             public Void run() throws Exception {
876               rsCoprocessorHost.preRollBackMerge(region_a, region_b);
877               return null;
878             }
879           });
880         } catch (InterruptedException ie) {
881           InterruptedIOException iioe = new InterruptedIOException();
882           iioe.initCause(ie);
883           throw iioe;
884         }
885       }
886     }
887 
888     boolean result = true;
889     ListIterator<JournalEntry> iterator = this.journal
890         .listIterator(this.journal.size());
891     // Iterate in reverse.
892     while (iterator.hasPrevious()) {
893       JournalEntry je = iterator.previous();
894       switch (je) {
895 
896         case SET_MERGING_IN_ZK:
897           if (useZKAndZKIsSet(server)) {
898             cleanZK(server, this.mergedRegionInfo);
899           } else if (services != null && !useZKForAssignment
900               && !services.reportRegionStateTransition(TransitionCode.MERGE_REVERTED,
901                   mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo())) {
902             return false;
903           }
904           break;
905 
906         case CREATED_MERGE_DIR:
907           this.region_a.writestate.writesEnabled = true;
908           this.region_b.writestate.writesEnabled = true;
909           this.region_a.getRegionFileSystem().cleanupMergesDir();
910           break;
911 
912         case CLOSED_REGION_A:
913           try {
914             // So, this returns a seqid but if we just closed and then reopened,
915             // we should be ok. On close, we flushed using sequenceid obtained
916             // from hosting regionserver so no need to propagate the sequenceid
917             // returned out of initialize below up into regionserver as we
918             // normally do.
919             this.region_a.initialize();
920           } catch (IOException e) {
921             LOG.error("Failed rollbacking CLOSED_REGION_A of region "
922                 + this.region_a.getRegionNameAsString(), e);
923             throw new RuntimeException(e);
924           }
925           break;
926 
927         case OFFLINED_REGION_A:
928           if (services != null)
929             services.addToOnlineRegions(this.region_a);
930           break;
931 
932         case CLOSED_REGION_B:
933           try {
934             this.region_b.initialize();
935           } catch (IOException e) {
936             LOG.error("Failed rollbacking CLOSED_REGION_A of region "
937                 + this.region_b.getRegionNameAsString(), e);
938             throw new RuntimeException(e);
939           }
940           break;
941 
942         case OFFLINED_REGION_B:
943           if (services != null)
944             services.addToOnlineRegions(this.region_b);
945           break;
946 
947         case STARTED_MERGED_REGION_CREATION:
948           this.region_a.getRegionFileSystem().cleanupMergedRegion(
949               this.mergedRegionInfo);
950           break;
951 
952         case PONR:
953           // We got to the point-of-no-return so we need to just abort. Return
954           // immediately. Do not clean up created merged regions.
955           return false;
956 
957         default:
958           throw new RuntimeException("Unhandled journal entry: " + je);
959       }
960     }
961     // Coprocessor callback
962     if (rsCoprocessorHost != null) {
963       if (user == null) {
964         rsCoprocessorHost.postRollBackMerge(region_a, region_b);
965       } else {
966         try {
967           user.getUGI().doAs(new PrivilegedExceptionAction<Void>() {
968             @Override
969             public Void run() throws Exception {
970               rsCoprocessorHost.postRollBackMerge(region_a, region_b);
971               return null;
972             }
973           });
974         } catch (InterruptedException ie) {
975           InterruptedIOException iioe = new InterruptedIOException();
976           iioe.initCause(ie);
977           throw iioe;
978         }
979       }
980     }
981 
982     return result;
983   }
984 
985   HRegionInfo getMergedRegionInfo() {
986     return this.mergedRegionInfo;
987   }
988 
989   // For unit testing.
990   Path getMergesDir() {
991     return this.mergesdir;
992   }
993 
994   private boolean useZKAndZKIsSet(final Server server) {
995     return server != null && useZKForAssignment && server.getZooKeeper() != null;
996   }
997 
998   private static void cleanZK(final Server server, final HRegionInfo hri) {
999     try {
1000       // Only delete if its in expected state; could have been hijacked.
1001       if (!ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
1002           RS_ZK_REQUEST_REGION_MERGE, server.getServerName())) {
1003         ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
1004           RS_ZK_REGION_MERGING, server.getServerName());
1005       }
1006     } catch (KeeperException.NoNodeException e) {
1007       LOG.info("Failed cleanup zk node of " + hri.getRegionNameAsString(), e);
1008     } catch (KeeperException e) {
1009       server.abort("Failed cleanup zk node of " + hri.getRegionNameAsString(),e);
1010     }
1011   }
1012 
1013   /**
1014    * Creates a new ephemeral node in the PENDING_MERGE state for the merged region.
1015    * Create it ephemeral in case regionserver dies mid-merge.
1016    *
1017    * <p>
1018    * Does not transition nodes from other states. If a node already exists for
1019    * this region, a {@link NodeExistsException} will be thrown.
1020    *
1021    * @param zkw zk reference
1022    * @param region region to be created as offline
1023    * @param serverName server event originates from
1024    * @throws KeeperException
1025    * @throws IOException
1026    */
1027   public static void createNodeMerging(final ZooKeeperWatcher zkw, final HRegionInfo region,
1028       final ServerName serverName, final HRegionInfo a,
1029       final HRegionInfo b) throws KeeperException, IOException {
1030     LOG.debug(zkw.prefix("Creating ephemeral node for "
1031       + region.getEncodedName() + " in PENDING_MERGE state"));
1032     byte [] payload = HRegionInfo.toDelimitedByteArray(region, a, b);
1033     RegionTransition rt = RegionTransition.createRegionTransition(
1034       RS_ZK_REQUEST_REGION_MERGE, region.getRegionName(), serverName, payload);
1035     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
1036     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
1037       throw new IOException("Failed create of ephemeral " + node);
1038     }
1039   }
1040 
1041   /**
1042    * Transitions an existing ephemeral node for the specified region which is
1043    * currently in the begin state to be in the end state. Master cleans up the
1044    * final MERGE znode when it reads it (or if we crash, zk will clean it up).
1045    *
1046    * <p>
1047    * Does not transition nodes from other states. If for some reason the node
1048    * could not be transitioned, the method returns -1. If the transition is
1049    * successful, the version of the node after transition is returned.
1050    *
1051    * <p>
1052    * This method can fail and return false for three different reasons:
1053    * <ul>
1054    * <li>Node for this region does not exist</li>
1055    * <li>Node for this region is not in the begin state</li>
1056    * <li>After verifying the begin state, update fails because of wrong version
1057    * (this should never actually happen since an RS only does this transition
1058    * following a transition to the begin state. If two RS are conflicting, one would
1059    * fail the original transition to the begin state and not this transition)</li>
1060    * </ul>
1061    *
1062    * <p>
1063    * Does not set any watches.
1064    *
1065    * <p>
1066    * This method should only be used by a RegionServer when merging two regions.
1067    *
1068    * @param zkw zk reference
1069    * @param merged region to be transitioned to opened
1070    * @param a merging region A
1071    * @param b merging region B
1072    * @param serverName server event originates from
1073    * @param znodeVersion expected version of data before modification
1074    * @param beginState the expected current state the znode should be
1075    * @param endState the state to be transition to
1076    * @return version of node after transition, -1 if unsuccessful transition
1077    * @throws KeeperException if unexpected zookeeper exception
1078    * @throws IOException
1079    */
1080   public static int transitionMergingNode(ZooKeeperWatcher zkw,
1081       HRegionInfo merged, HRegionInfo a, HRegionInfo b, ServerName serverName,
1082       final int znodeVersion, final EventType beginState,
1083       final EventType endState) throws KeeperException, IOException {
1084     byte[] payload = HRegionInfo.toDelimitedByteArray(merged, a, b);
1085     return ZKAssign.transitionNode(zkw, merged, serverName,
1086       beginState, endState, znodeVersion, payload);
1087   }
1088 
1089   /**
1090    * Checks if the given region has merge qualifier in hbase:meta
1091    * @param services
1092    * @param regionName name of specified region
1093    * @return true if the given region has merge qualifier in META.(It will be
1094    *         cleaned by CatalogJanitor)
1095    * @throws IOException
1096    */
1097   boolean hasMergeQualifierInMeta(final RegionServerServices services,
1098       final byte[] regionName) throws IOException {
1099     if (services == null) return false;
1100     // Get merge regions if it is a merged region and already has merge
1101     // qualifier
1102     Pair<HRegionInfo, HRegionInfo> mergeRegions = MetaReader
1103         .getRegionsFromMergeQualifier(services.getCatalogTracker(), regionName);
1104     if (mergeRegions != null &&
1105         (mergeRegions.getFirst() != null || mergeRegions.getSecond() != null)) {
1106       // It has merge qualifier
1107       return true;
1108     }
1109     return false;
1110   }
1111 }