View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.util.Arrays;
23  import java.util.Comparator;
24  import java.util.List;
25  import java.util.Map;
26  import java.util.TreeMap;
27  import java.util.concurrent.ConcurrentHashMap;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  import org.apache.commons.lang.NotImplementedException;
32  import org.apache.commons.logging.Log;
33  import org.apache.commons.logging.LogFactory;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FSDataInputStream;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.FileStatus;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.fs.PathFilter;
41  import org.apache.hadoop.hbase.HBaseFileSystem;
42  import org.apache.hadoop.hbase.HConstants;
43  import org.apache.hadoop.hbase.HTableDescriptor;
44  import org.apache.hadoop.hbase.TableDescriptors;
45  import org.apache.hadoop.hbase.TableInfoMissingException;
46  
47  
48  /**
49   * Implementation of {@link TableDescriptors} that reads descriptors from the
50   * passed filesystem.  It expects descriptors to be in a file under the
51   * table's directory in FS.  Can be read-only -- i.e. does not modify
52   * the filesystem or can be read and write.
53   * 
54   * <p>Also has utility for keeping up the table descriptors tableinfo file.
55   * The table schema file is kept under the table directory in the filesystem.
56   * It has a {@link #TABLEINFO_NAME} prefix and then a suffix that is the
57   * edit sequenceid: e.g. <code>.tableinfo.0000000003</code>.  This sequenceid
58   * is always increasing.  It starts at zero.  The table schema file with the
59   * highest sequenceid has the most recent schema edit. Usually there is one file
60   * only, the most recent but there may be short periods where there are more
61   * than one file. Old files are eventually cleaned.  Presumption is that there
62   * will not be lots of concurrent clients making table schema edits.  If so,
63   * the below needs a bit of a reworking and perhaps some supporting api in hdfs.
64   */
65  public class FSTableDescriptors implements TableDescriptors {
66    private static final Log LOG = LogFactory.getLog(FSTableDescriptors.class);
67    private final FileSystem fs;
68    private final Path rootdir;
69    private final boolean fsreadonly;
70    long cachehits = 0;
71    long invocations = 0;
72  
73    /** The file name used to store HTD in HDFS  */
74    public static final String TABLEINFO_NAME = ".tableinfo";
75  
76    // This cache does not age out the old stuff.  Thinking is that the amount
77    // of data we keep up in here is so small, no need to do occasional purge.
78    // TODO.
79    private final Map<String, TableDescriptorModtime> cache =
80      new ConcurrentHashMap<String, TableDescriptorModtime>();
81  
82    /**
83     * Data structure to hold modification time and table descriptor.
84     */
85    static class TableDescriptorModtime {
86      private final HTableDescriptor descriptor;
87      private final long modtime;
88  
89      TableDescriptorModtime(final long modtime, final HTableDescriptor htd) {
90        this.descriptor = htd;
91        this.modtime = modtime;
92      }
93  
94      long getModtime() {
95        return this.modtime;
96      }
97  
98      HTableDescriptor getTableDescriptor() {
99        return this.descriptor;
100     }
101   }
102 
103   public FSTableDescriptors(final FileSystem fs, final Path rootdir) {
104     this(fs, rootdir, false);
105   }
106 
107   /**
108    * @param fs
109    * @param rootdir
110    * @param fsreadOnly True if we are read-only when it comes to filesystem
111    * operations; i.e. on remove, we do not do delete in fs.
112    */
113   public FSTableDescriptors(final FileSystem fs, final Path rootdir,
114       final boolean fsreadOnly) {
115     super();
116     this.fs = fs;
117     this.rootdir = rootdir;
118     this.fsreadonly = fsreadOnly;
119   }
120 
121   /* (non-Javadoc)
122    * @see org.apache.hadoop.hbase.TableDescriptors#getHTableDescriptor(java.lang.String)
123    */
124   @Override
125   public HTableDescriptor get(final byte [] tablename)
126   throws IOException {
127     return get(Bytes.toString(tablename));
128   }
129 
130   /* (non-Javadoc)
131    * @see org.apache.hadoop.hbase.TableDescriptors#getTableDescriptor(byte[])
132    */
133   @Override
134   public HTableDescriptor get(final String tablename)
135   throws IOException {
136     invocations++;
137     if (HTableDescriptor.ROOT_TABLEDESC.getNameAsString().equals(tablename)) {
138       cachehits++;
139       return HTableDescriptor.ROOT_TABLEDESC;
140     }
141     if (HTableDescriptor.META_TABLEDESC.getNameAsString().equals(tablename)) {
142       cachehits++;
143       return HTableDescriptor.META_TABLEDESC;
144     }
145     // .META. and -ROOT- is already handled. If some one tries to get the descriptor for
146     // .logs, .oldlogs or .corrupt throw an exception.
147     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tablename)) {
148        throw new IOException("No descriptor found for table = " + tablename);
149     }
150 
151     // Look in cache of descriptors.
152     TableDescriptorModtime cachedtdm = this.cache.get(tablename);
153 
154     if (cachedtdm != null) {
155       // Check mod time has not changed (this is trip to NN).
156       if (getTableInfoModtime(this.fs, this.rootdir, tablename) <= cachedtdm.getModtime()) {
157         cachehits++;
158         return cachedtdm.getTableDescriptor();
159       }
160     }
161     
162     TableDescriptorModtime tdmt = null;
163     try {
164       tdmt = getTableDescriptorModtime(this.fs, this.rootdir, tablename);
165     } catch (NullPointerException e) {
166       LOG.debug("Exception during readTableDecriptor. Current table name = "
167           + tablename, e);
168     } catch (IOException ioe) {
169       LOG.debug("Exception during readTableDecriptor. Current table name = "
170           + tablename, ioe);
171     }
172     
173     if (tdmt == null) {
174       LOG.warn("The following folder is in HBase's root directory and " +
175         "doesn't contain a table descriptor, " +
176         "do consider deleting it: " + tablename);
177     } else {
178       this.cache.put(tablename, tdmt);
179     }
180     return tdmt == null ? null : tdmt.getTableDescriptor();
181   }
182 
183   /* (non-Javadoc)
184    * @see org.apache.hadoop.hbase.TableDescriptors#getTableDescriptors(org.apache.hadoop.fs.FileSystem, org.apache.hadoop.fs.Path)
185    */
186   @Override
187   public Map<String, HTableDescriptor> getAll()
188   throws IOException {
189     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
190     List<Path> tableDirs = FSUtils.getTableDirs(fs, rootdir);
191     for (Path d: tableDirs) {
192       HTableDescriptor htd = null;
193       try {
194 
195         htd = get(d.getName());
196       } catch (FileNotFoundException fnfe) {
197         // inability of retrieving one HTD shouldn't stop getting the remaining
198         LOG.warn("Trouble retrieving htd", fnfe);
199       }
200       if (htd == null) continue;
201       htds.put(d.getName(), htd);
202     }
203     return htds;
204   }
205 
206   @Override
207   public void add(HTableDescriptor htd) throws IOException {
208     if (Bytes.equals(HConstants.ROOT_TABLE_NAME, htd.getName())) {
209       throw new NotImplementedException();
210     }
211     if (Bytes.equals(HConstants.META_TABLE_NAME, htd.getName())) {
212       throw new NotImplementedException();
213     }
214     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(htd.getNameAsString())) {
215       throw new NotImplementedException();
216     }
217     if (!this.fsreadonly) updateHTableDescriptor(this.fs, this.rootdir, htd);
218     long modtime = getTableInfoModtime(this.fs, this.rootdir, htd.getNameAsString());
219     this.cache.put(htd.getNameAsString(), new TableDescriptorModtime(modtime, htd));
220   }
221 
222   @Override
223   public HTableDescriptor remove(final String tablename)
224   throws IOException {
225     if (!this.fsreadonly) {
226       Path tabledir = FSUtils.getTablePath(this.rootdir, tablename);
227       if (this.fs.exists(tabledir)) {
228         if (!HBaseFileSystem.deleteDirFromFileSystem(fs, tabledir)) {
229           throw new IOException("Failed delete of " + tabledir.toString());
230         }
231       }
232     }
233     TableDescriptorModtime tdm = this.cache.remove(tablename);
234     return tdm == null ? null : tdm.getTableDescriptor();
235   }
236 
237   /**
238    * Checks if <code>.tableinfo<code> exists for given table
239    * 
240    * @param fs file system
241    * @param rootdir root directory of HBase installation
242    * @param tableName name of table
243    * @return true if exists
244    * @throws IOException
245    */
246   public static boolean isTableInfoExists(FileSystem fs, Path rootdir,
247       String tableName) throws IOException {
248     FileStatus status = getTableInfoPath(fs, rootdir, tableName);
249     return status == null? false: fs.exists(status.getPath());
250   }
251 
252   private static FileStatus getTableInfoPath(final FileSystem fs,
253       final Path rootdir, final String tableName)
254   throws IOException {
255     Path tabledir = FSUtils.getTablePath(rootdir, tableName);
256     return getTableInfoPath(fs, tabledir);
257   }
258 
259   /**
260    * Looks under the table directory in the filesystem for files with a
261    * {@link #TABLEINFO_NAME} prefix.  Returns reference to the 'latest' instance.
262    * @param fs
263    * @param tabledir
264    * @return The 'current' tableinfo file.
265    * @throws IOException
266    */
267   public static FileStatus getTableInfoPath(final FileSystem fs,
268       final Path tabledir)
269   throws IOException {
270     FileStatus [] status = FSUtils.listStatus(fs, tabledir, new PathFilter() {
271       @Override
272       public boolean accept(Path p) {
273         // Accept any file that starts with TABLEINFO_NAME
274         return p.getName().startsWith(TABLEINFO_NAME);
275       }
276     });
277     if (status == null || status.length < 1) return null;
278     Arrays.sort(status, new FileStatusFileNameComparator());
279     if (status.length > 1) {
280       // Clean away old versions of .tableinfo
281       for (int i = 1; i < status.length; i++) {
282         Path p = status[i].getPath();
283         // Clean up old versions
284         if (!HBaseFileSystem.deleteFileFromFileSystem(fs, p)) {
285           LOG.warn("Failed cleanup of " + status);
286         } else {
287           LOG.debug("Cleaned up old tableinfo file " + p);
288         }
289       }
290     }
291     return status[0];
292   }
293 
294   /**
295    * Compare {@link FileStatus} instances by {@link Path#getName()}.
296    * Returns in reverse order.
297    */
298   static class FileStatusFileNameComparator
299   implements Comparator<FileStatus> {
300     @Override
301     public int compare(FileStatus left, FileStatus right) {
302       return -left.compareTo(right);
303     }
304   }
305 
306   /**
307    * Width of the sequenceid that is a suffix on a tableinfo file.
308    */
309   static final int WIDTH_OF_SEQUENCE_ID = 10;
310 
311   /*
312    * @param number Number to use as suffix.
313    * @return Returns zero-prefixed 5-byte wide decimal version of passed
314    * number (Does absolute in case number is negative).
315    */
316   static String formatTableInfoSequenceId(final int number) {
317     byte [] b = new byte[WIDTH_OF_SEQUENCE_ID];
318     int d = Math.abs(number);
319     for (int i = b.length - 1; i >= 0; i--) {
320       b[i] = (byte)((d % 10) + '0');
321       d /= 10;
322     }
323     return Bytes.toString(b);
324   }
325 
326   /**
327    * Regex to eat up sequenceid suffix on a .tableinfo file.
328    * Use regex because may encounter oldstyle .tableinfos where there is no
329    * sequenceid on the end.
330    */
331   private static final Pattern SUFFIX =
332     Pattern.compile(TABLEINFO_NAME + "(\\.([0-9]{" + WIDTH_OF_SEQUENCE_ID + "}))?$");
333 
334 
335   /**
336    * @param p Path to a <code>.tableinfo</code> file.
337    * @return The current editid or 0 if none found.
338    */
339   static int getTableInfoSequenceid(final Path p) {
340     if (p == null) return 0;
341     Matcher m = SUFFIX.matcher(p.getName());
342     if (!m.matches()) throw new IllegalArgumentException(p.toString());
343     String suffix = m.group(2);
344     if (suffix == null || suffix.length() <= 0) return 0;
345     return Integer.parseInt(m.group(2));
346   }
347 
348   /**
349    * @param tabledir
350    * @param sequenceid
351    * @return Name of tableinfo file.
352    */
353   static Path getTableInfoFileName(final Path tabledir, final int sequenceid) {
354     return new Path(tabledir,
355       TABLEINFO_NAME + "." + formatTableInfoSequenceId(sequenceid));
356   }
357 
358   /**
359    * @param fs
360    * @param rootdir
361    * @param tableName
362    * @return Modification time for the table {@link #TABLEINFO_NAME} file
363    * or <code>0</code> if no tableinfo file found.
364    * @throws IOException
365    */
366   static long getTableInfoModtime(final FileSystem fs, final Path rootdir,
367       final String tableName)
368   throws IOException {
369     FileStatus status = getTableInfoPath(fs, rootdir, tableName);
370     return status == null? 0: status.getModificationTime();
371   }
372 
373   /**
374    * Get HTD from HDFS.
375    * @param fs
376    * @param hbaseRootDir
377    * @param tableName
378    * @return Descriptor or null if none found.
379    * @throws IOException
380    */
381   public static HTableDescriptor getTableDescriptor(FileSystem fs,
382       Path hbaseRootDir, byte[] tableName)
383   throws IOException {
384      HTableDescriptor htd = null;
385      try {
386        TableDescriptorModtime tdmt =
387          getTableDescriptorModtime(fs, hbaseRootDir, Bytes.toString(tableName));
388        htd = tdmt == null ? null : tdmt.getTableDescriptor();
389      } catch (NullPointerException e) {
390        LOG.debug("Exception during readTableDecriptor. Current table name = "
391            + Bytes.toString(tableName), e);
392      }
393      return htd;
394   }
395 
396   static HTableDescriptor getTableDescriptor(FileSystem fs,
397       Path hbaseRootDir, String tableName) throws NullPointerException, IOException {
398     TableDescriptorModtime tdmt = getTableDescriptorModtime(fs, hbaseRootDir, tableName);
399     return tdmt == null ? null : tdmt.getTableDescriptor();
400   }
401 
402   static TableDescriptorModtime getTableDescriptorModtime(FileSystem fs,
403       Path hbaseRootDir, String tableName) throws NullPointerException, IOException{
404     // ignore both -ROOT- and .META. tables
405     if (Bytes.compareTo(Bytes.toBytes(tableName), HConstants.ROOT_TABLE_NAME) == 0
406         || Bytes.compareTo(Bytes.toBytes(tableName), HConstants.META_TABLE_NAME) == 0) {
407       return null;
408     }
409     return getTableDescriptorModtime(fs, FSUtils.getTablePath(hbaseRootDir, tableName));
410   }
411 
412   static TableDescriptorModtime getTableDescriptorModtime(FileSystem fs, Path tableDir)
413   throws NullPointerException, IOException {
414     if (tableDir == null) throw new NullPointerException();
415     FileStatus status = getTableInfoPath(fs, tableDir);
416     if (status == null) {
417       throw new TableInfoMissingException("No .tableinfo file under "
418           + tableDir.toUri());
419     }
420     FSDataInputStream fsDataInputStream = fs.open(status.getPath());
421     HTableDescriptor hTableDescriptor = null;
422     try {
423       hTableDescriptor = new HTableDescriptor();
424       hTableDescriptor.readFields(fsDataInputStream);
425     } finally {
426       fsDataInputStream.close();
427     }
428     return new TableDescriptorModtime(status.getModificationTime(), hTableDescriptor);
429   }
430 
431   public static HTableDescriptor getTableDescriptor(FileSystem fs, Path tableDir)
432   throws IOException, NullPointerException {
433     TableDescriptorModtime tdmt = getTableDescriptorModtime(fs, tableDir);
434     return tdmt == null? null: tdmt.getTableDescriptor();
435   }
436  
437 
438   /**
439    * Update table descriptor
440    * @param fs
441    * @param conf
442    * @param hTableDescriptor
443    * @return New tableinfo or null if we failed update.
444    * @throws IOException Thrown if failed update.
445    */
446   static Path updateHTableDescriptor(FileSystem fs, Path rootdir,
447       HTableDescriptor hTableDescriptor)
448   throws IOException {
449     Path tableDir = FSUtils.getTablePath(rootdir, hTableDescriptor.getName());
450     Path p = writeTableDescriptor(fs, hTableDescriptor, tableDir,
451       getTableInfoPath(fs, tableDir));
452     if (p == null) throw new IOException("Failed update");
453     LOG.info("Updated tableinfo=" + p);
454     return p;
455   }
456 
457   /**
458    * Deletes a table's directory from the file system if exists. Used in unit
459    * tests.
460    */
461   public static void deleteTableDescriptorIfExists(String tableName,
462       Configuration conf) throws IOException {
463     FileSystem fs = FSUtils.getCurrentFileSystem(conf);
464     FileStatus status = getTableInfoPath(fs, FSUtils.getRootDir(conf), tableName);
465     // The below deleteDirectory works for either file or directory.
466     if (status != null && fs.exists(status.getPath())) {
467       FSUtils.deleteDirectory(fs, status.getPath());
468     }
469   }
470 
471   /**
472    * @param fs
473    * @param hTableDescriptor
474    * @param tableDir
475    * @param status
476    * @return Descriptor file or null if we failed write.
477    * @throws IOException 
478    */
479   private static Path writeTableDescriptor(final FileSystem fs,
480       final HTableDescriptor hTableDescriptor, final Path tableDir,
481       final FileStatus status)
482   throws IOException {
483     // Get temporary dir into which we'll first write a file to avoid
484     // half-written file phenomeon.
485     Path tmpTableDir = new Path(tableDir, ".tmp");
486     // What is current sequenceid?  We read the current sequenceid from
487     // the current file.  After we read it, another thread could come in and
488     // compete with us writing out next version of file.  The below retries
489     // should help in this case some but its hard to do guarantees in face of
490     // concurrent schema edits.
491     int currentSequenceid =
492       status == null? 0: getTableInfoSequenceid(status.getPath());
493     int sequenceid = currentSequenceid;
494     // Put arbitrary upperbound on how often we retry
495     int retries = 10;
496     int retrymax = currentSequenceid + retries;
497     Path tableInfoPath = null;
498     do {
499       sequenceid += 1;
500       Path p = getTableInfoFileName(tmpTableDir, sequenceid);
501       if (fs.exists(p)) {
502         LOG.debug(p + " exists; retrying up to " + retries + " times");
503         continue;
504       }
505       try {
506         writeHTD(fs, p, hTableDescriptor);
507         tableInfoPath = getTableInfoFileName(tableDir, sequenceid);
508         if (!HBaseFileSystem.renameDirForFileSystem(fs, p, tableInfoPath)) {
509           throw new IOException("Failed rename of " + p + " to " + tableInfoPath);
510         }
511       } catch (IOException ioe) {
512         // Presume clash of names or something; go around again.
513         LOG.debug("Failed write and/or rename; retrying", ioe);
514         if (!FSUtils.deleteDirectory(fs, p)) {
515           LOG.warn("Failed cleanup of " + p);
516         }
517         tableInfoPath = null;
518         continue;
519       }
520       // Cleanup old schema file.
521       if (status != null) {
522         if (!FSUtils.deleteDirectory(fs, status.getPath())) {
523           LOG.warn("Failed delete of " + status.getPath() + "; continuing");
524         }
525       }
526       break;
527     } while (sequenceid < retrymax);
528     return tableInfoPath;
529   }
530 
531   private static void writeHTD(final FileSystem fs, final Path p,
532       final HTableDescriptor htd)
533   throws IOException {
534     FSDataOutputStream out = HBaseFileSystem.createPathOnFileSystem(fs, p, false);
535     try {
536       htd.write(out);
537       out.write('\n');
538       out.write('\n');
539       out.write(Bytes.toBytes(htd.toString()));
540     } finally {
541       out.close();
542     }
543   }
544 
545   /**
546    * Create new HTableDescriptor in HDFS. Happens when we are creating table.
547    * 
548    * @param htableDescriptor
549    * @param conf
550    */
551   public static boolean createTableDescriptor(final HTableDescriptor htableDescriptor,
552       Configuration conf)
553   throws IOException {
554     return createTableDescriptor(htableDescriptor, conf, false);
555   }
556 
557   /**
558    * Create new HTableDescriptor in HDFS. Happens when we are creating table. If
559    * forceCreation is true then even if previous table descriptor is present it
560    * will be overwritten
561    * 
562    * @param htableDescriptor
563    * @param conf
564    * @param forceCreation True if we are to overwrite existing file.
565    */
566   static boolean createTableDescriptor(final HTableDescriptor htableDescriptor,
567       final Configuration conf, boolean forceCreation)
568   throws IOException {
569     FileSystem fs = FSUtils.getCurrentFileSystem(conf);
570     return createTableDescriptor(fs, FSUtils.getRootDir(conf), htableDescriptor,
571         forceCreation);
572   }
573 
574   /**
575    * Create new HTableDescriptor in HDFS. Happens when we are creating table.
576    * Used by tests.
577    * @param fs
578    * @param htableDescriptor
579    * @param rootdir
580    */
581   public static boolean createTableDescriptor(FileSystem fs, Path rootdir,
582       HTableDescriptor htableDescriptor)
583   throws IOException {
584     return createTableDescriptor(fs, rootdir, htableDescriptor, false);
585   }
586 
587   /**
588    * Create new HTableDescriptor in HDFS. Happens when we are creating table. If
589    * forceCreation is true then even if previous table descriptor is present it
590    * will be overwritten
591    * 
592    * @param fs
593    * @param htableDescriptor
594    * @param rootdir
595    * @param forceCreation
596    * @return True if we successfully created file.
597    */
598   public static boolean createTableDescriptor(FileSystem fs, Path rootdir,
599       HTableDescriptor htableDescriptor, boolean forceCreation)
600   throws IOException {
601     Path tabledir = FSUtils.getTablePath(rootdir, htableDescriptor.getNameAsString());
602     return createTableDescriptorForTableDirectory(fs, tabledir, htableDescriptor, forceCreation);
603   }
604 
605   /**
606    * Create a new HTableDescriptor in HDFS in the specified table directory. Happens when we create
607    * a new table or snapshot a table.
608    * @param fs filesystem where the descriptor should be written
609    * @param tabledir directory under which we should write the file
610    * @param htableDescriptor description of the table to write
611    * @param forceCreation if <tt>true</tt>,then even if previous table descriptor is present it will
612    *          be overwritten
613    * @return <tt>true</tt> if the we successfully created the file, <tt>false</tt> if the file
614    *         already exists and we weren't forcing the descriptor creation.
615    * @throws IOException if a filesystem error occurs
616    */
617   public static boolean createTableDescriptorForTableDirectory(FileSystem fs, Path tabledir,
618       HTableDescriptor htableDescriptor, boolean forceCreation) throws IOException {
619     FileStatus status = getTableInfoPath(fs, tabledir);
620     if (status != null) {
621       LOG.info("Current tableInfoPath = " + status.getPath());
622       if (!forceCreation) {
623         if (fs.exists(status.getPath()) && status.getLen() > 0) {
624           LOG.info("TableInfo already exists.. Skipping creation");
625           return false;
626         }
627       }
628     }
629     Path p = writeTableDescriptor(fs, htableDescriptor, tabledir, status);
630     return p != null;
631   }
632 }