View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.util.Comparator;
24  import java.util.HashSet;
25  import java.util.Map;
26  import java.util.TreeMap;
27  import java.util.concurrent.atomic.AtomicBoolean;
28  import java.util.concurrent.atomic.AtomicInteger;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.hbase.classification.InterfaceAudience;
33  import org.apache.hadoop.fs.FileSystem;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.Chore;
36  import org.apache.hadoop.hbase.TableName;
37  import org.apache.hadoop.hbase.HColumnDescriptor;
38  import org.apache.hadoop.hbase.HConstants;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.HTableDescriptor;
41  import org.apache.hadoop.hbase.Server;
42  import org.apache.hadoop.hbase.backup.HFileArchiver;
43  import org.apache.hadoop.hbase.catalog.MetaEditor;
44  import org.apache.hadoop.hbase.catalog.MetaReader;
45  import org.apache.hadoop.hbase.client.MetaScanner;
46  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
47  import org.apache.hadoop.hbase.client.Result;
48  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.hbase.util.FSUtils;
51  import org.apache.hadoop.hbase.util.Pair;
52  import org.apache.hadoop.hbase.util.PairOfSameType;
53  import org.apache.hadoop.hbase.util.Triple;
54  
55  /**
56   * A janitor for the catalog tables.  Scans the <code>hbase:meta</code> catalog
57   * table on a period looking for unused regions to garbage collect.
58   */
59  @InterfaceAudience.Private
60  public class CatalogJanitor extends Chore {
61    private static final Log LOG = LogFactory.getLog(CatalogJanitor.class.getName());
62    private final Server server;
63    private final MasterServices services;
64    private AtomicBoolean enabled = new AtomicBoolean(true);
65    private AtomicBoolean alreadyRunning = new AtomicBoolean(false);
66  
67    CatalogJanitor(final Server server, final MasterServices services) {
68      super("CatalogJanitor-" + server.getServerName().toShortString(),
69        server.getConfiguration().getInt("hbase.catalogjanitor.interval", 300000),
70        server);
71      this.server = server;
72      this.services = services;
73    }
74  
75    @Override
76    protected boolean initialChore() {
77      try {
78        if (this.enabled.get()) scan();
79      } catch (IOException e) {
80        LOG.warn("Failed initial scan of catalog table", e);
81        return false;
82      }
83      return true;
84    }
85  
86    /**
87     * @param enabled
88     */
89    public boolean setEnabled(final boolean enabled) {
90      return this.enabled.getAndSet(enabled);
91    }
92  
93    boolean getEnabled() {
94      return this.enabled.get();
95    }
96  
97    @Override
98    protected void chore() {
99      try {
100       AssignmentManager am = this.services.getAssignmentManager();
101       if (this.enabled.get()
102           && am != null
103           && am.isFailoverCleanupDone()
104           && am.getRegionStates().getRegionsInTransition().size() == 0) {
105         scan();
106       } else {
107         LOG.warn("CatalogJanitor disabled! Not running scan.");
108       }
109     } catch (IOException e) {
110       LOG.warn("Failed scan of catalog table", e);
111     }
112   }
113 
114   /**
115    * Scans hbase:meta and returns a number of scanned rows, and a map of merged
116    * regions, and an ordered map of split parents.
117    * @return triple of scanned rows, map of merged regions and map of split
118    *         parent regioninfos
119    * @throws IOException
120    */
121   Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> getMergedRegionsAndSplitParents()
122       throws IOException {
123     return getMergedRegionsAndSplitParents(null);
124   }
125 
126   /**
127    * Scans hbase:meta and returns a number of scanned rows, and a map of merged
128    * regions, and an ordered map of split parents. if the given table name is
129    * null, return merged regions and split parents of all tables, else only the
130    * specified table
131    * @param tableName null represents all tables
132    * @return triple of scanned rows, and map of merged regions, and map of split
133    *         parent regioninfos
134    * @throws IOException
135    */
136   Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> getMergedRegionsAndSplitParents(
137       final TableName tableName) throws IOException {
138     final boolean isTableSpecified = (tableName != null);
139     // TODO: Only works with single hbase:meta region currently.  Fix.
140     final AtomicInteger count = new AtomicInteger(0);
141     // Keep Map of found split parents.  There are candidates for cleanup.
142     // Use a comparator that has split parents come before its daughters.
143     final Map<HRegionInfo, Result> splitParents =
144       new TreeMap<HRegionInfo, Result>(new SplitParentFirstComparator());
145     final Map<HRegionInfo, Result> mergedRegions = new TreeMap<HRegionInfo, Result>();
146     // This visitor collects split parents and counts rows in the hbase:meta table
147 
148     MetaScannerVisitor visitor = new MetaScanner.MetaScannerVisitorBase() {
149       @Override
150       public boolean processRow(Result r) throws IOException {
151         if (r == null || r.isEmpty()) return true;
152         count.incrementAndGet();
153         HRegionInfo info = HRegionInfo.getHRegionInfo(r);
154         if (info == null) return true; // Keep scanning
155         if (isTableSpecified
156             && info.getTable().compareTo(tableName) > 0) {
157           // Another table, stop scanning
158           return false;
159         }
160         if (info.isSplitParent()) splitParents.put(info, r);
161         if (r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null) {
162           mergedRegions.put(info, r);
163         }
164         // Returning true means "keep scanning"
165         return true;
166       }
167     };
168 
169     // Run full scan of hbase:meta catalog table passing in our custom visitor with
170     // the start row
171     MetaScanner.metaScan(server.getConfiguration(), null, visitor, tableName);
172 
173     return new Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>>(
174         count.get(), mergedRegions, splitParents);
175   }
176 
177   /**
178    * If merged region no longer holds reference to the merge regions, archive
179    * merge region on hdfs and perform deleting references in hbase:meta
180    * @param mergedRegion
181    * @param regionA
182    * @param regionB
183    * @return true if we delete references in merged region on hbase:meta and archive
184    *         the files on the file system
185    * @throws IOException
186    */
187   boolean cleanMergeRegion(final HRegionInfo mergedRegion,
188       final HRegionInfo regionA, final HRegionInfo regionB) throws IOException {
189     FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
190     Path rootdir = this.services.getMasterFileSystem().getRootDir();
191     Path tabledir = FSUtils.getTableDir(rootdir, mergedRegion.getTable());
192     HTableDescriptor htd = getTableDescriptor(mergedRegion.getTable());
193     HRegionFileSystem regionFs = null;
194     try {
195       regionFs = HRegionFileSystem.openRegionFromFileSystem(
196           this.services.getConfiguration(), fs, tabledir, mergedRegion, true);
197     } catch (IOException e) {
198       LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName());
199     }
200     if (regionFs == null || !regionFs.hasReferences(htd)) {
201       LOG.debug("Deleting region " + regionA.getRegionNameAsString() + " and "
202           + regionB.getRegionNameAsString()
203           + " from fs because merged region no longer holds references");
204       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionA);
205       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionB);
206       MetaEditor.deleteMergeQualifiers(server.getCatalogTracker(), mergedRegion);
207       return true;
208     }
209     return false;
210   }
211 
212   /**
213    * Run janitorial scan of catalog <code>hbase:meta</code> table looking for
214    * garbage to collect.
215    * @return number of cleaned regions
216    * @throws IOException
217    */
218   int scan() throws IOException {
219     try {
220       if (!alreadyRunning.compareAndSet(false, true)) {
221         return 0;
222       }
223       Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> scanTriple =
224         getMergedRegionsAndSplitParents();
225       int count = scanTriple.getFirst();
226       /**
227        * clean merge regions first
228        */
229       int mergeCleaned = 0;
230       Map<HRegionInfo, Result> mergedRegions = scanTriple.getSecond();
231       for (Map.Entry<HRegionInfo, Result> e : mergedRegions.entrySet()) {
232         HRegionInfo regionA = HRegionInfo.getHRegionInfo(e.getValue(),
233             HConstants.MERGEA_QUALIFIER);
234         HRegionInfo regionB = HRegionInfo.getHRegionInfo(e.getValue(),
235             HConstants.MERGEB_QUALIFIER);
236         if (regionA == null || regionB == null) {
237           LOG.warn("Unexpected references regionA="
238               + (regionA == null ? "null" : regionA.getRegionNameAsString())
239               + ",regionB="
240               + (regionB == null ? "null" : regionB.getRegionNameAsString())
241               + " in merged region " + e.getKey().getRegionNameAsString());
242         } else {
243           if (cleanMergeRegion(e.getKey(), regionA, regionB)) {
244             mergeCleaned++;
245           }
246         }
247       }
248       /**
249        * clean split parents
250        */
251       Map<HRegionInfo, Result> splitParents = scanTriple.getThird();
252 
253       // Now work on our list of found parents. See if any we can clean up.
254       int splitCleaned = 0;
255       // regions whose parents are still around
256       HashSet<String> parentNotCleaned = new HashSet<String>();
257       for (Map.Entry<HRegionInfo, Result> e : splitParents.entrySet()) {
258         if (!parentNotCleaned.contains(e.getKey().getEncodedName()) &&
259             cleanParent(e.getKey(), e.getValue())) {
260           splitCleaned++;
261         } else {
262           // We could not clean the parent, so it's daughters should not be cleaned either (HBASE-6160)
263           PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(e.getValue());
264           parentNotCleaned.add(daughters.getFirst().getEncodedName());
265           parentNotCleaned.add(daughters.getSecond().getEncodedName());
266         }
267       }
268       if ((mergeCleaned + splitCleaned) != 0) {
269         LOG.info("Scanned " + count + " catalog row(s), gc'd " + mergeCleaned
270             + " unreferenced merged region(s) and " + splitCleaned
271             + " unreferenced parent region(s)");
272       } else if (LOG.isTraceEnabled()) {
273         LOG.trace("Scanned " + count + " catalog row(s), gc'd " + mergeCleaned
274             + " unreferenced merged region(s) and " + splitCleaned
275             + " unreferenced parent region(s)");
276       }
277       return mergeCleaned + splitCleaned;
278     } finally {
279       alreadyRunning.set(false);
280     }
281   }
282 
283   /**
284    * Compare HRegionInfos in a way that has split parents sort BEFORE their
285    * daughters.
286    */
287   static class SplitParentFirstComparator implements Comparator<HRegionInfo> {
288     Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator();
289     @Override
290     public int compare(HRegionInfo left, HRegionInfo right) {
291       // This comparator differs from the one HRegionInfo in that it sorts
292       // parent before daughters.
293       if (left == null) return -1;
294       if (right == null) return 1;
295       // Same table name.
296       int result = left.getTable().compareTo(right.getTable());
297       if (result != 0) return result;
298       // Compare start keys.
299       result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
300       if (result != 0) return result;
301       // Compare end keys, but flip the operands so parent comes first
302       result = rowEndKeyComparator.compare(right.getEndKey(), left.getEndKey());
303 
304       return result;
305     }
306   }
307 
308   /**
309    * If daughters no longer hold reference to the parents, delete the parent.
310    * @param parent HRegionInfo of split offlined parent
311    * @param rowContent Content of <code>parent</code> row in
312    * <code>metaRegionName</code>
313    * @return True if we removed <code>parent</code> from meta table and from
314    * the filesystem.
315    * @throws IOException
316    */
317   boolean cleanParent(final HRegionInfo parent, Result rowContent)
318   throws IOException {
319     boolean result = false;
320     // Check whether it is a merged region and not clean reference
321     // No necessary to check MERGEB_QUALIFIER because these two qualifiers will
322     // be inserted/deleted together
323     if (rowContent.getValue(HConstants.CATALOG_FAMILY,
324         HConstants.MERGEA_QUALIFIER) != null) {
325       // wait cleaning merge region first
326       return result;
327     }
328     // Run checks on each daughter split.
329     PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(rowContent);
330     Pair<Boolean, Boolean> a = checkDaughterInFs(parent, daughters.getFirst());
331     Pair<Boolean, Boolean> b = checkDaughterInFs(parent, daughters.getSecond());
332     if (hasNoReferences(a) && hasNoReferences(b)) {
333       LOG.debug("Deleting region " + parent.getRegionNameAsString() +
334         " because daughter splits no longer hold references");
335       FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
336       if (LOG.isTraceEnabled()) LOG.trace("Archiving parent region: " + parent);
337       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, parent);
338       MetaEditor.deleteRegion(this.server.getCatalogTracker(), parent);
339       result = true;
340     }
341     return result;
342   }
343 
344   /**
345    * @param p A pair where the first boolean says whether or not the daughter
346    * region directory exists in the filesystem and then the second boolean says
347    * whether the daughter has references to the parent.
348    * @return True the passed <code>p</code> signifies no references.
349    */
350   private boolean hasNoReferences(final Pair<Boolean, Boolean> p) {
351     return !p.getFirst() || !p.getSecond();
352   }
353 
354   /**
355    * Checks if a daughter region -- either splitA or splitB -- still holds
356    * references to parent.
357    * @param parent Parent region
358    * @param daughter Daughter region
359    * @return A pair where the first boolean says whether or not the daughter
360    * region directory exists in the filesystem and then the second boolean says
361    * whether the daughter has references to the parent.
362    * @throws IOException
363    */
364   Pair<Boolean, Boolean> checkDaughterInFs(final HRegionInfo parent, final HRegionInfo daughter)
365   throws IOException {
366     if (daughter == null)  {
367       return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);
368     }
369 
370     FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
371     Path rootdir = this.services.getMasterFileSystem().getRootDir();
372     Path tabledir = FSUtils.getTableDir(rootdir, daughter.getTable());
373 
374     Path daughterRegionDir = new Path(tabledir, daughter.getEncodedName());
375 
376     HRegionFileSystem regionFs = null;
377 
378     try {
379       if (!FSUtils.isExists(fs, daughterRegionDir)) {
380         return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);
381       }
382     } catch (IOException ioe) {
383       LOG.warn("Error trying to determine if daughter region exists, " +
384                "assuming exists and has references", ioe);
385       return new Pair<Boolean, Boolean>(Boolean.TRUE, Boolean.TRUE);
386     }
387 
388     try {
389       regionFs = HRegionFileSystem.openRegionFromFileSystem(
390           this.services.getConfiguration(), fs, tabledir, daughter, true);
391     } catch (IOException e) {
392       LOG.warn("Error trying to determine referenced files from : " + daughter.getEncodedName()
393           + ", to: " + parent.getEncodedName() + " assuming has references", e);
394       return new Pair<Boolean, Boolean>(Boolean.TRUE, Boolean.TRUE);
395     }
396 
397     boolean references = false;
398     HTableDescriptor parentDescriptor = getTableDescriptor(parent.getTable());
399     for (HColumnDescriptor family: parentDescriptor.getFamilies()) {
400       if ((references = regionFs.hasReferences(family.getNameAsString()))) {
401         break;
402       }
403     }
404     return new Pair<Boolean, Boolean>(Boolean.TRUE, Boolean.valueOf(references));
405   }
406 
407   private HTableDescriptor getTableDescriptor(final TableName tableName)
408       throws FileNotFoundException, IOException {
409     return this.services.getTableDescriptors().get(tableName);
410   }
411 
412   /**
413    * Checks if the specified region has merge qualifiers, if so, try to clean
414    * them
415    * @param region
416    * @return true if the specified region doesn't have merge qualifier now
417    * @throws IOException
418    */
419   public boolean cleanMergeQualifier(final HRegionInfo region)
420       throws IOException {
421     // Get merge regions if it is a merged region and already has merge
422     // qualifier
423     Pair<HRegionInfo, HRegionInfo> mergeRegions = MetaReader
424         .getRegionsFromMergeQualifier(this.services.getCatalogTracker(),
425             region.getRegionName());
426     if (mergeRegions == null
427         || (mergeRegions.getFirst() == null && mergeRegions.getSecond() == null)) {
428       // It doesn't have merge qualifier, no need to clean
429       return true;
430     }
431     // It shouldn't happen, we must insert/delete these two qualifiers together
432     if (mergeRegions.getFirst() == null || mergeRegions.getSecond() == null) {
433       LOG.error("Merged region " + region.getRegionNameAsString()
434           + " has only one merge qualifier in META.");
435       return false;
436     }
437     return cleanMergeRegion(region, mergeRegions.getFirst(),
438         mergeRegions.getSecond());
439   }
440 }