View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.util.Comparator;
24  import java.util.HashSet;
25  import java.util.Map;
26  import java.util.TreeMap;
27  import java.util.concurrent.atomic.AtomicBoolean;
28  import java.util.concurrent.atomic.AtomicInteger;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.classification.InterfaceAudience;
33  import org.apache.hadoop.fs.FileSystem;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.Chore;
36  import org.apache.hadoop.hbase.HColumnDescriptor;
37  import org.apache.hadoop.hbase.HConstants;
38  import org.apache.hadoop.hbase.HRegionInfo;
39  import org.apache.hadoop.hbase.HTableDescriptor;
40  import org.apache.hadoop.hbase.Server;
41  import org.apache.hadoop.hbase.backup.HFileArchiver;
42  import org.apache.hadoop.hbase.catalog.MetaEditor;
43  import org.apache.hadoop.hbase.catalog.MetaReader;
44  import org.apache.hadoop.hbase.client.Result;
45  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
46  import org.apache.hadoop.hbase.util.Bytes;
47  import org.apache.hadoop.hbase.util.Pair;
48  import org.apache.hadoop.hbase.util.PairOfSameType;
49  import org.apache.hadoop.hbase.util.Triple;
50  
51  /**
52   * A janitor for the catalog tables.  Scans the <code>.META.</code> catalog
53   * table on a period looking for unused regions to garbage collect.
54   */
55  @InterfaceAudience.Private
56  public class CatalogJanitor extends Chore {
57    private static final Log LOG = LogFactory.getLog(CatalogJanitor.class.getName());
58    private final Server server;
59    private final MasterServices services;
60    private AtomicBoolean enabled = new AtomicBoolean(true);
61    private AtomicBoolean alreadyRunning = new AtomicBoolean(false);
62  
63    CatalogJanitor(final Server server, final MasterServices services) {
64      super(server.getServerName() + "-CatalogJanitor",
65        server.getConfiguration().getInt("hbase.catalogjanitor.interval", 300000),
66        server);
67      this.server = server;
68      this.services = services;
69    }
70  
71    @Override
72    protected boolean initialChore() {
73      try {
74        if (this.enabled.get()) scan();
75      } catch (IOException e) {
76        LOG.warn("Failed initial scan of catalog table", e);
77        return false;
78      }
79      return true;
80    }
81  
82    /**
83     * @param enabled
84     */
85    public boolean setEnabled(final boolean enabled) {
86      return this.enabled.getAndSet(enabled);
87    }
88  
89    boolean getEnabled() {
90      return this.enabled.get();
91    }
92  
93    @Override
94    protected void chore() {
95      try {
96        if (this.enabled.get()) {
97          scan();
98        } else {
99          LOG.warn("CatalogJanitor disabled! Not running scan.");
100       }
101     } catch (IOException e) {
102       LOG.warn("Failed scan of catalog table", e);
103     }
104   }
105 
106   /**
107    * Scans META and returns a number of scanned rows, and a map of merged
108    * regions, and an ordered map of split parents.
109    * @return triple of scanned rows, map of merged regions and map of split
110    *         parent regioninfos
111    * @throws IOException
112    */
113   Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> getMergedRegionsAndSplitParents()
114       throws IOException {
115     return getMergedRegionsAndSplitParents(null);
116   }
117 
118   /**
119    * Scans META and returns a number of scanned rows, and a map of merged
120    * regions, and an ordered map of split parents. if the given table name is
121    * null, return merged regions and split parents of all tables, else only the
122    * specified table
123    * @param tableName null represents all tables
124    * @return triple of scanned rows, and map of merged regions, and map of split
125    *         parent regioninfos
126    * @throws IOException
127    */
128   Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> getMergedRegionsAndSplitParents(
129       final byte[] tableName) throws IOException {
130     final boolean isTableSpecified = (tableName != null && tableName.length != 0);
131     // TODO: Only works with single .META. region currently.  Fix.
132     final AtomicInteger count = new AtomicInteger(0);
133     // Keep Map of found split parents.  There are candidates for cleanup.
134     // Use a comparator that has split parents come before its daughters.
135     final Map<HRegionInfo, Result> splitParents =
136       new TreeMap<HRegionInfo, Result>(new SplitParentFirstComparator());
137     final Map<HRegionInfo, Result> mergedRegions = new TreeMap<HRegionInfo, Result>();
138     // This visitor collects split parents and counts rows in the .META. table
139     MetaReader.Visitor visitor = new MetaReader.Visitor() {
140       @Override
141       public boolean visit(Result r) throws IOException {
142         if (r == null || r.isEmpty()) return true;
143         count.incrementAndGet();
144         HRegionInfo info = HRegionInfo.getHRegionInfo(r);
145         if (info == null) return true; // Keep scanning
146         if (isTableSpecified
147             && Bytes.compareTo(info.getTableName(), tableName) > 0) {
148           // Another table, stop scanning
149           return false;
150         }
151         if (info.isSplitParent()) splitParents.put(info, r);
152         if (r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null) {
153           mergedRegions.put(info, r);
154         }
155         // Returning true means "keep scanning"
156         return true;
157       }
158     };
159 
160     byte[] startRow = (!isTableSpecified) ? HConstants.EMPTY_START_ROW
161         : HRegionInfo.createRegionName(tableName, HConstants.EMPTY_START_ROW,
162             HConstants.ZEROES, false);
163     // Run full scan of .META. catalog table passing in our custom visitor with
164     // the start row
165     MetaReader.fullScan(this.server.getCatalogTracker(), visitor, startRow);
166 
167     return new Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>>(
168         count.get(), mergedRegions, splitParents);
169   }
170 
171   /**
172    * If merged region no longer holds reference to the merge regions, archive
173    * merge region on hdfs and perform deleting references in .META.
174    * @param mergedRegion
175    * @param regionA
176    * @param regionB
177    * @return true if we delete references in merged region on .META. and archive
178    *         the files on the file system
179    * @throws IOException
180    */
181   boolean cleanMergeRegion(final HRegionInfo mergedRegion,
182       final HRegionInfo regionA, final HRegionInfo regionB) throws IOException {
183     FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
184     Path rootdir = this.services.getMasterFileSystem().getRootDir();
185     Path tabledir = HTableDescriptor.getTableDir(rootdir,
186         mergedRegion.getTableName());
187     HTableDescriptor htd = getTableDescriptor(mergedRegion
188         .getTableNameAsString());
189     HRegionFileSystem regionFs = null;
190     try {
191       regionFs = HRegionFileSystem.openRegionFromFileSystem(
192           this.services.getConfiguration(), fs, tabledir, mergedRegion, true);
193     } catch (IOException e) {
194       LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName());
195     }
196     if (regionFs == null || !regionFs.hasReferences(htd)) {
197       LOG.debug("Deleting region " + regionA.getRegionNameAsString() + " and "
198           + regionB.getRegionNameAsString()
199           + " from fs because merged region no longer holds references");
200       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionA);
201       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionB);
202       MetaEditor.deleteMergeQualifiers(server.getCatalogTracker(), mergedRegion);
203       return true;
204     }
205     return false;
206   }
207 
208   /**
209    * Run janitorial scan of catalog <code>.META.</code> table looking for
210    * garbage to collect.
211    * @return number of cleaned regions
212    * @throws IOException
213    */
214   int scan() throws IOException {
215     try {
216       if (!alreadyRunning.compareAndSet(false, true)) {
217         return 0;
218       }
219       Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> scanTriple =
220         getMergedRegionsAndSplitParents();
221       int count = scanTriple.getFirst();
222       /**
223        * clean merge regions first
224        */
225       int mergeCleaned = 0;
226       Map<HRegionInfo, Result> mergedRegions = scanTriple.getSecond();
227       for (Map.Entry<HRegionInfo, Result> e : mergedRegions.entrySet()) {
228         HRegionInfo regionA = HRegionInfo.getHRegionInfo(e.getValue(),
229             HConstants.MERGEA_QUALIFIER);
230         HRegionInfo regionB = HRegionInfo.getHRegionInfo(e.getValue(),
231             HConstants.MERGEB_QUALIFIER);
232         if (regionA == null || regionB == null) {
233           LOG.warn("Unexpected references regionA="
234               + (regionA == null ? "null" : regionA.getRegionNameAsString())
235               + ",regionB="
236               + (regionB == null ? "null" : regionB.getRegionNameAsString())
237               + " in merged region " + e.getKey().getRegionNameAsString());
238         } else {
239           if (cleanMergeRegion(e.getKey(), regionA, regionB)) {
240             mergeCleaned++;
241           }
242         }
243       }
244       /**
245        * clean split parents
246        */
247       Map<HRegionInfo, Result> splitParents = scanTriple.getThird();
248 
249       // Now work on our list of found parents. See if any we can clean up.
250       int splitCleaned = 0;
251       // regions whose parents are still around
252       HashSet<String> parentNotCleaned = new HashSet<String>();
253       for (Map.Entry<HRegionInfo, Result> e : splitParents.entrySet()) {
254         if (!parentNotCleaned.contains(e.getKey().getEncodedName()) &&
255             cleanParent(e.getKey(), e.getValue())) {
256           splitCleaned++;
257         } else {
258           // We could not clean the parent, so it's daughters should not be cleaned either (HBASE-6160)
259           PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(e.getValue());
260           parentNotCleaned.add(daughters.getFirst().getEncodedName());
261           parentNotCleaned.add(daughters.getSecond().getEncodedName());
262         }
263       }
264       if ((mergeCleaned + splitCleaned) != 0) {
265         LOG.info("Scanned " + count + " catalog row(s), gc'd " + mergeCleaned
266             + " unreferenced merged region(s) and " + splitCleaned
267             + " unreferenced parent region(s)");
268       } else if (LOG.isDebugEnabled()) {
269         LOG.debug("Scanned " + count + " catalog row(s), gc'd " + mergeCleaned
270             + " unreferenced merged region(s) and " + splitCleaned
271             + " unreferenced parent region(s)");
272       }
273       return mergeCleaned + splitCleaned;
274     } finally {
275       alreadyRunning.set(false);
276     }
277   }
278 
279   /**
280    * Compare HRegionInfos in a way that has split parents sort BEFORE their
281    * daughters.
282    */
283   static class SplitParentFirstComparator implements Comparator<HRegionInfo> {
284     @Override
285     public int compare(HRegionInfo left, HRegionInfo right) {
286       // This comparator differs from the one HRegionInfo in that it sorts
287       // parent before daughters.
288       if (left == null) return -1;
289       if (right == null) return 1;
290       // Same table name.
291       int result = Bytes.compareTo(left.getTableName(),
292           right.getTableName());
293       if (result != 0) return result;
294       // Compare start keys.
295       result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
296       if (result != 0) return result;
297       // Compare end keys.
298       result = Bytes.compareTo(left.getEndKey(), right.getEndKey());
299       if (result != 0) {
300         if (left.getStartKey().length != 0
301                 && left.getEndKey().length == 0) {
302             return -1;  // left is last region
303         }
304         if (right.getStartKey().length != 0
305                 && right.getEndKey().length == 0) {
306             return 1;  // right is the last region
307         }
308         return -result; // Flip the result so parent comes first.
309       }
310       return result;
311     }
312   }
313 
314   /**
315    * If daughters no longer hold reference to the parents, delete the parent.
316    * @param parent HRegionInfo of split offlined parent
317    * @param rowContent Content of <code>parent</code> row in
318    * <code>metaRegionName</code>
319    * @return True if we removed <code>parent</code> from meta table and from
320    * the filesystem.
321    * @throws IOException
322    */
323   boolean cleanParent(final HRegionInfo parent, Result rowContent)
324   throws IOException {
325     boolean result = false;
326     // Check whether it is a merged region and not clean reference
327     // No necessary to check MERGEB_QUALIFIER because these two qualifiers will
328     // be inserted/deleted together
329     if (rowContent.getValue(HConstants.CATALOG_FAMILY,
330         HConstants.MERGEA_QUALIFIER) != null) {
331       // wait cleaning merge region first
332       return result;
333     }
334     // Run checks on each daughter split.
335     PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(rowContent);
336     Pair<Boolean, Boolean> a = checkDaughterInFs(parent, daughters.getFirst());
337     Pair<Boolean, Boolean> b = checkDaughterInFs(parent, daughters.getSecond());
338     if (hasNoReferences(a) && hasNoReferences(b)) {
339       LOG.debug("Deleting region " + parent.getRegionNameAsString() +
340         " because daughter splits no longer hold references");
341       // wipe out daughter references from parent region in meta
342       removeDaughtersFromParent(parent);
343 
344       // This latter regionOffline should not be necessary but is done for now
345       // until we let go of regionserver to master heartbeats.  See HBASE-3368.
346       if (this.services.getAssignmentManager() != null) {
347         // The mock used in testing catalogjanitor returns null for getAssignmnetManager.
348         // Allow for null result out of getAssignmentManager.
349         this.services.getAssignmentManager().regionOffline(parent);
350       }
351       FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
352       LOG.debug("Archiving parent region:" + parent);
353       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, parent);
354       MetaEditor.deleteRegion(this.server.getCatalogTracker(), parent);
355       result = true;
356     }
357     return result;
358   }
359 
360   /**
361    * @param p A pair where the first boolean says whether or not the daughter
362    * region directory exists in the filesystem and then the second boolean says
363    * whether the daughter has references to the parent.
364    * @return True the passed <code>p</code> signifies no references.
365    */
366   private boolean hasNoReferences(final Pair<Boolean, Boolean> p) {
367     return !p.getFirst() || !p.getSecond();
368   }
369 
370   /**
371    * Remove mention of daughters from parent row.
372    * @param parent
373    * @throws IOException
374    */
375   private void removeDaughtersFromParent(final HRegionInfo parent)
376   throws IOException {
377     MetaEditor.deleteDaughtersReferencesInParent(this.server.getCatalogTracker(), parent);
378   }
379 
380   /**
381    * Checks if a daughter region -- either splitA or splitB -- still holds
382    * references to parent.
383    * @param parent Parent region
384    * @param daughter Daughter region
385    * @return A pair where the first boolean says whether or not the daughter
386    * region directory exists in the filesystem and then the second boolean says
387    * whether the daughter has references to the parent.
388    * @throws IOException
389    */
390   Pair<Boolean, Boolean> checkDaughterInFs(final HRegionInfo parent, final HRegionInfo daughter)
391   throws IOException {
392     if (daughter == null)  {
393       return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);
394     }
395 
396     FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
397     Path rootdir = this.services.getMasterFileSystem().getRootDir();
398     Path tabledir = HTableDescriptor.getTableDir(rootdir, daughter.getTableName());
399 
400     HRegionFileSystem regionFs = null;
401     try {
402       regionFs = HRegionFileSystem.openRegionFromFileSystem(
403           this.services.getConfiguration(), fs, tabledir, daughter, true);
404     } catch (IOException e) {
405       LOG.warn("Daughter region does not exist: " + daughter.getEncodedName());
406       return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);
407     }
408 
409     boolean references = false;
410     HTableDescriptor parentDescriptor = getTableDescriptor(parent.getTableNameAsString());
411     for (HColumnDescriptor family: parentDescriptor.getFamilies()) {
412       if ((references = regionFs.hasReferences(family.getNameAsString()))) {
413         break;
414       }
415     }
416     return new Pair<Boolean, Boolean>(Boolean.TRUE, Boolean.valueOf(references));
417   }
418 
419   private HTableDescriptor getTableDescriptor(final String tableName)
420       throws FileNotFoundException, IOException {
421     return this.services.getTableDescriptors().get(tableName);
422   }
423 
424   /**
425    * Checks if the specified region has merge qualifiers, if so, try to clean
426    * them
427    * @param region
428    * @return true if the specified region doesn't have merge qualifier now
429    * @throws IOException
430    */
431   public boolean cleanMergeQualifier(final HRegionInfo region)
432       throws IOException {
433     // Get merge regions if it is a merged region and already has merge
434     // qualifier
435     Pair<HRegionInfo, HRegionInfo> mergeRegions = MetaReader
436         .getRegionsFromMergeQualifier(this.services.getCatalogTracker(),
437             region.getRegionName());
438     if (mergeRegions == null
439         || (mergeRegions.getFirst() == null && mergeRegions.getSecond() == null)) {
440       // It doesn't have merge qualifier, no need to clean
441       return true;
442     }
443     // It shouldn't happen, we must insert/delete these two qualifiers together
444     if (mergeRegions.getFirst() == null || mergeRegions.getSecond() == null) {
445       LOG.error("Merged region " + region.getRegionNameAsString()
446           + " has only one merge qualifier in META.");
447       return false;
448     }
449     return cleanMergeRegion(region, mergeRegions.getFirst(),
450         mergeRegions.getSecond());
451   }
452 }