View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License
17   */
18  
19  package org.apache.hadoop.hbase.regionserver.wal;
20  import org.apache.hadoop.classification.InterfaceAudience;
21  
22  import java.io.DataInput;
23  import java.io.DataOutput;
24  import java.io.IOException;
25  
26  import org.apache.hadoop.hbase.HBaseConfiguration;
27  import org.apache.hadoop.hbase.HConstants;
28  import org.apache.hadoop.hbase.regionserver.wal.HLog;
29  import org.apache.hadoop.hbase.util.Bytes;
30  import org.apache.hadoop.io.WritableUtils;
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import com.google.common.base.Preconditions;
35  
36  /**
37   * A set of static functions for running our custom WAL compression/decompression.
38   * Also contains a command line tool to compress and uncompress HLogs.
39   */
40  @InterfaceAudience.Private
41  public class Compressor {
42    /**
43     * Command line tool to compress and uncompress WALs.
44     */
45    public static void main(String[] args) throws IOException {
46      if (args.length != 2 || args[0].equals("--help") || args[0].equals("-h")) {
47        printHelp();
48        System.exit(-1);
49      }
50  
51      Path inputPath = new Path(args[0]);
52      Path outputPath = new Path(args[1]);
53  
54      transformFile(inputPath, outputPath);
55    }
56  
57    private static void printHelp() {
58      System.err.println("usage: Compressor <input> <output>");
59      System.err.println("If <input> HLog is compressed, <output> will be decompressed.");
60      System.err.println("If <input> HLog is uncompressed, <output> will be compressed.");
61      return;
62    }
63  
64    private static void transformFile(Path input, Path output)
65        throws IOException {
66      SequenceFileLogReader in = new SequenceFileLogReader();
67      SequenceFileLogWriter out = new SequenceFileLogWriter();
68  
69      try {
70        Configuration conf = HBaseConfiguration.create();
71  
72        FileSystem inFS = input.getFileSystem(conf);
73        FileSystem outFS = output.getFileSystem(conf);
74  
75        in.init(inFS, input, conf);
76        boolean compress = in.reader.isWALCompressionEnabled();
77  
78        conf.setBoolean(HConstants.ENABLE_WAL_COMPRESSION, !compress);
79        out.init(outFS, output, conf);
80  
81        HLog.Entry e = null;
82        while ((e = in.next()) != null) out.append(e);
83      } finally {
84        in.close();
85        out.close();
86      }
87    }
88  
89    /**
90     * Reads the next compressed entry and returns it as a byte array
91     * 
92     * @param in the DataInput to read from
93     * @param dict the dictionary we use for our read.
94     * @return the uncompressed array.
95     */
96    static byte[] readCompressed(DataInput in, Dictionary dict)
97        throws IOException {
98      byte status = in.readByte();
99  
100     if (status == Dictionary.NOT_IN_DICTIONARY) {
101       int length = WritableUtils.readVInt(in);
102       // if this isn't in the dictionary, we need to add to the dictionary.
103       byte[] arr = new byte[length];
104       in.readFully(arr);
105       if (dict != null) dict.addEntry(arr, 0, length);
106       return arr;
107     } else {
108       // Status here is the higher-order byte of index of the dictionary entry
109       // (when its not Dictionary.NOT_IN_DICTIONARY -- dictionary indices are
110       // shorts).
111       short dictIdx = toShort(status, in.readByte());
112       byte[] entry = dict.getEntry(dictIdx);
113       if (entry == null) {
114         throw new IOException("Missing dictionary entry for index "
115             + dictIdx);
116       }
117       return entry;
118     }
119   }
120 
121   /**
122    * Reads a compressed entry into an array.
123    * The output into the array ends up length-prefixed.
124    * 
125    * @param to the array to write into
126    * @param offset array offset to start writing to
127    * @param in the DataInput to read from
128    * @param dict the dictionary to use for compression
129    * 
130    * @return the length of the uncompressed data
131    */
132   static int uncompressIntoArray(byte[] to, int offset, DataInput in,
133       Dictionary dict) throws IOException {
134     byte status = in.readByte();
135 
136     if (status == Dictionary.NOT_IN_DICTIONARY) {
137       // status byte indicating that data to be read is not in dictionary.
138       // if this isn't in the dictionary, we need to add to the dictionary.
139       int length = WritableUtils.readVInt(in);
140       in.readFully(to, offset, length);
141       dict.addEntry(to, offset, length);
142       return length;
143     } else {
144       // the status byte also acts as the higher order byte of the dictionary
145       // entry
146       short dictIdx = toShort(status, in.readByte());
147       byte[] entry;
148       try {
149         entry = dict.getEntry(dictIdx);
150       } catch (Exception ex) {
151         throw new IOException("Unable to uncompress the log entry", ex);
152       }
153       if (entry == null) {
154         throw new IOException("Missing dictionary entry for index "
155             + dictIdx);
156       }
157       // now we write the uncompressed value.
158       Bytes.putBytes(to, offset, entry, 0, entry.length);
159       return entry.length;
160     }
161   }
162 
163   /**
164    * Compresses and writes an array to a DataOutput
165    * 
166    * @param data the array to write.
167    * @param out the DataOutput to write into
168    * @param dict the dictionary to use for compression
169    */
170   static void writeCompressed(byte[] data, int offset, int length,
171       DataOutput out, Dictionary dict)
172       throws IOException {
173     short dictIdx = Dictionary.NOT_IN_DICTIONARY;
174     if (dict != null) {
175       dictIdx = dict.findEntry(data, offset, length);
176     }
177     if (dictIdx == Dictionary.NOT_IN_DICTIONARY) {
178       // not in dict
179       out.writeByte(Dictionary.NOT_IN_DICTIONARY);
180       WritableUtils.writeVInt(out, length);
181       out.write(data, offset, length);
182     } else {
183       out.writeShort(dictIdx);
184     }
185   }
186 
187   static short toShort(byte hi, byte lo) {
188     short s = (short) (((hi & 0xFF) << 8) | (lo & 0xFF));
189     Preconditions.checkArgument(s >= 0);
190     return s;
191   }
192 }