View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License
17   */
18  
19  package org.apache.hadoop.hbase.regionserver.wal;
20  import org.apache.hadoop.classification.InterfaceAudience;
21  
22  import java.io.DataInput;
23  import java.io.DataOutput;
24  import java.io.IOException;
25  
26  import org.apache.hadoop.hbase.HBaseConfiguration;
27  import org.apache.hadoop.hbase.HConstants;
28  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
29  import org.apache.hadoop.hbase.regionserver.wal.HLog;
30  import org.apache.hadoop.hbase.util.Bytes;
31  import org.apache.hadoop.io.WritableUtils;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.fs.FileSystem;
34  import org.apache.hadoop.fs.Path;
35  import com.google.common.base.Preconditions;
36  import com.google.protobuf.ByteString;
37  
38  /**
39   * A set of static functions for running our custom WAL compression/decompression.
40   * Also contains a command line tool to compress and uncompress HLogs.
41   */
42  @InterfaceAudience.Private
43  public class Compressor {
44    /**
45     * Command line tool to compress and uncompress WALs.
46     */
47    public static void main(String[] args) throws IOException {
48      if (args.length != 2 || args[0].equals("--help") || args[0].equals("-h")) {
49        printHelp();
50        System.exit(-1);
51      }
52  
53      Path inputPath = new Path(args[0]);
54      Path outputPath = new Path(args[1]);
55  
56      transformFile(inputPath, outputPath);
57    }
58  
59    private static void printHelp() {
60      System.err.println("usage: Compressor <input> <output>");
61      System.err.println("If <input> HLog is compressed, <output> will be decompressed.");
62      System.err.println("If <input> HLog is uncompressed, <output> will be compressed.");
63      return;
64    }
65  
66    private static void transformFile(Path input, Path output)
67        throws IOException {
68      Configuration conf = HBaseConfiguration.create();
69  
70      FileSystem inFS = input.getFileSystem(conf);
71      FileSystem outFS = output.getFileSystem(conf);
72  
73      HLog.Reader in = HLogFactory.createReader(inFS, input, conf, null, false);
74      HLog.Writer out = null;
75  
76      try {
77        if (!(in instanceof ReaderBase)) {
78          System.err.println("Cannot proceed, invalid reader type: " + in.getClass().getName());
79          return;
80        }
81        boolean compress = ((ReaderBase)in).hasCompression();
82        conf.setBoolean(HConstants.ENABLE_WAL_COMPRESSION, !compress);
83        out = HLogFactory.createWriter(outFS, output, conf);
84  
85        HLog.Entry e = null;
86        while ((e = in.next()) != null) out.append(e);
87      } finally {
88        in.close();
89        if (out != null) {
90          out.close();
91          out = null;
92        }
93      }
94    }
95  
96    /**
97     * Reads the next compressed entry and returns it as a byte array
98     * 
99     * @param in the DataInput to read from
100    * @param dict the dictionary we use for our read.
101    * @return the uncompressed array.
102    */
103   @Deprecated
104   static byte[] readCompressed(DataInput in, Dictionary dict)
105       throws IOException {
106     byte status = in.readByte();
107 
108     if (status == Dictionary.NOT_IN_DICTIONARY) {
109       int length = WritableUtils.readVInt(in);
110       // if this isn't in the dictionary, we need to add to the dictionary.
111       byte[] arr = new byte[length];
112       in.readFully(arr);
113       if (dict != null) dict.addEntry(arr, 0, length);
114       return arr;
115     } else {
116       // Status here is the higher-order byte of index of the dictionary entry
117       // (when its not Dictionary.NOT_IN_DICTIONARY -- dictionary indices are
118       // shorts).
119       short dictIdx = toShort(status, in.readByte());
120       byte[] entry = dict.getEntry(dictIdx);
121       if (entry == null) {
122         throw new IOException("Missing dictionary entry for index "
123             + dictIdx);
124       }
125       return entry;
126     }
127   }
128 
129   /**
130    * Reads a compressed entry into an array.
131    * The output into the array ends up length-prefixed.
132    * 
133    * @param to the array to write into
134    * @param offset array offset to start writing to
135    * @param in the DataInput to read from
136    * @param dict the dictionary to use for compression
137    * 
138    * @return the length of the uncompressed data
139    */
140   @Deprecated
141   static int uncompressIntoArray(byte[] to, int offset, DataInput in,
142       Dictionary dict) throws IOException {
143     byte status = in.readByte();
144 
145     if (status == Dictionary.NOT_IN_DICTIONARY) {
146       // status byte indicating that data to be read is not in dictionary.
147       // if this isn't in the dictionary, we need to add to the dictionary.
148       int length = WritableUtils.readVInt(in);
149       in.readFully(to, offset, length);
150       dict.addEntry(to, offset, length);
151       return length;
152     } else {
153       // the status byte also acts as the higher order byte of the dictionary
154       // entry
155       short dictIdx = toShort(status, in.readByte());
156       byte[] entry;
157       try {
158         entry = dict.getEntry(dictIdx);
159       } catch (Exception ex) {
160         throw new IOException("Unable to uncompress the log entry", ex);
161       }
162       if (entry == null) {
163         throw new IOException("Missing dictionary entry for index "
164             + dictIdx);
165       }
166       // now we write the uncompressed value.
167       Bytes.putBytes(to, offset, entry, 0, entry.length);
168       return entry.length;
169     }
170   }
171 
172   /**
173    * Compresses and writes an array to a DataOutput
174    * 
175    * @param data the array to write.
176    * @param out the DataOutput to write into
177    * @param dict the dictionary to use for compression
178    */
179   @Deprecated
180   static void writeCompressed(byte[] data, int offset, int length,
181       DataOutput out, Dictionary dict)
182       throws IOException {
183     short dictIdx = Dictionary.NOT_IN_DICTIONARY;
184     if (dict != null) {
185       dictIdx = dict.findEntry(data, offset, length);
186     }
187     if (dictIdx == Dictionary.NOT_IN_DICTIONARY) {
188       // not in dict
189       out.writeByte(Dictionary.NOT_IN_DICTIONARY);
190       WritableUtils.writeVInt(out, length);
191       out.write(data, offset, length);
192     } else {
193       out.writeShort(dictIdx);
194     }
195   }
196 
197   static short toShort(byte hi, byte lo) {
198     short s = (short) (((hi & 0xFF) << 8) | (lo & 0xFF));
199     Preconditions.checkArgument(s >= 0);
200     return s;
201   }
202 }