View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.io.DataOutputStream;
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.Arrays;
25  import java.util.List;
26  
27  import org.apache.hadoop.classification.InterfaceAudience;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FSDataOutputStream;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.fs.permission.FsPermission;
33  import org.apache.hadoop.hbase.HConstants;
34  import org.apache.hadoop.hbase.KeyValue.KeyComparator;
35  import org.apache.hadoop.hbase.io.compress.Compression;
36  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
37  import org.apache.hadoop.hbase.util.Bytes;
38  import org.apache.hadoop.hbase.util.FSUtils;
39  import org.apache.hadoop.io.RawComparator;
40  import org.apache.hadoop.io.Writable;
41  
42  /**
43   * Common functionality needed by all versions of {@link HFile} writers.
44   */
45  @InterfaceAudience.Private
46  public abstract class AbstractHFileWriter implements HFile.Writer {
47  
48    /** Key previously appended. Becomes the last key in the file. */
49    protected byte[] lastKeyBuffer = null;
50  
51    protected int lastKeyOffset = -1;
52    protected int lastKeyLength = -1;
53  
54    /** FileSystem stream to write into. */
55    protected FSDataOutputStream outputStream;
56  
57    /** True if we opened the <code>outputStream</code> (and so will close it). */
58    protected final boolean closeOutputStream;
59  
60    /** A "file info" block: a key-value map of file-wide metadata. */
61    protected FileInfo fileInfo = new HFile.FileInfo();
62  
63    /** Number of uncompressed bytes we allow per block. */
64    protected final int blockSize;
65  
66    /** Total # of key/value entries, i.e. how many times add() was called. */
67    protected long entryCount = 0;
68  
69    /** Used for calculating the average key length. */
70    protected long totalKeyLength = 0;
71  
72    /** Used for calculating the average value length. */
73    protected long totalValueLength = 0;
74  
75    /** Total uncompressed bytes, maybe calculate a compression ratio later. */
76    protected long totalUncompressedBytes = 0;
77  
78    /** Key comparator. Used to ensure we write in order. */
79    protected final RawComparator<byte[]> comparator;
80  
81    /** Meta block names. */
82    protected List<byte[]> metaNames = new ArrayList<byte[]>();
83  
84    /** {@link Writable}s representing meta block data. */
85    protected List<Writable> metaData = new ArrayList<Writable>();
86  
87    /** The compression algorithm used. NONE if no compression. */
88    protected final Compression.Algorithm compressAlgo;
89    
90    /**
91     * The data block encoding which will be used.
92     * {@link NoOpDataBlockEncoder#INSTANCE} if there is no encoding.
93     */
94    protected final HFileDataBlockEncoder blockEncoder;
95  
96    /** First key in a block. */
97    protected byte[] firstKeyInBlock = null;
98  
99    /** May be null if we were passed a stream. */
100   protected final Path path;
101 
102 
103   /** Cache configuration for caching data on write. */
104   protected final CacheConfig cacheConf;
105 
106   /**
107    * Name for this object used when logging or in toString. Is either
108    * the result of a toString on stream or else name of passed file Path.
109    */
110   protected final String name;
111 
112   public AbstractHFileWriter(CacheConfig cacheConf,
113       FSDataOutputStream outputStream, Path path, int blockSize,
114       Compression.Algorithm compressAlgo,
115       HFileDataBlockEncoder dataBlockEncoder,
116       KeyComparator comparator) {
117     this.outputStream = outputStream;
118     this.path = path;
119     this.name = path != null ? path.getName() : outputStream.toString();
120     this.blockSize = blockSize;
121     this.compressAlgo = compressAlgo == null
122         ? HFile.DEFAULT_COMPRESSION_ALGORITHM : compressAlgo;
123     this.blockEncoder = dataBlockEncoder != null
124         ? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE;
125     this.comparator = comparator != null ? comparator
126         : Bytes.BYTES_RAWCOMPARATOR;
127 
128     closeOutputStream = path != null;
129     this.cacheConf = cacheConf;
130   }
131 
132   /**
133    * Add last bits of metadata to file info before it is written out.
134    */
135   protected void finishFileInfo() throws IOException {
136     if (lastKeyBuffer != null) {
137       // Make a copy. The copy is stuffed into our fileinfo map. Needs a clean
138       // byte buffer. Won't take a tuple.
139       fileInfo.append(FileInfo.LASTKEY, Arrays.copyOfRange(lastKeyBuffer,
140           lastKeyOffset, lastKeyOffset + lastKeyLength), false);
141     }
142 
143     // Average key length.
144     int avgKeyLen =
145         entryCount == 0 ? 0 : (int) (totalKeyLength / entryCount);
146     fileInfo.append(FileInfo.AVG_KEY_LEN, Bytes.toBytes(avgKeyLen), false);
147 
148     // Average value length.
149     int avgValueLen =
150         entryCount == 0 ? 0 : (int) (totalValueLength / entryCount);
151     fileInfo.append(FileInfo.AVG_VALUE_LEN, Bytes.toBytes(avgValueLen), false);
152   }
153 
154   /**
155    * Add to the file info. All added key/value pairs can be obtained using
156    * {@link HFile.Reader#loadFileInfo()}.
157    *
158    * @param k Key
159    * @param v Value
160    * @throws IOException in case the key or the value are invalid
161    */
162   @Override
163   public void appendFileInfo(final byte[] k, final byte[] v)
164       throws IOException {
165     fileInfo.append(k, v, true);
166   }
167 
168   /**
169    * Sets the file info offset in the trailer, finishes up populating fields in
170    * the file info, and writes the file info into the given data output. The
171    * reason the data output is not always {@link #outputStream} is that we store
172    * file info as a block in version 2.
173    *
174    * @param trailer fixed file trailer
175    * @param out the data output to write the file info to
176    * @throws IOException
177    */
178   protected final void writeFileInfo(FixedFileTrailer trailer, DataOutputStream out)
179   throws IOException {
180     trailer.setFileInfoOffset(outputStream.getPos());
181     finishFileInfo();
182     fileInfo.write(out);
183   }
184 
185   /**
186    * Checks that the given key does not violate the key order.
187    *
188    * @param key Key to check.
189    * @return true if the key is duplicate
190    * @throws IOException if the key or the key order is wrong
191    */
192   protected boolean checkKey(final byte[] key, final int offset,
193       final int length) throws IOException {
194     boolean isDuplicateKey = false;
195 
196     if (key == null || length <= 0) {
197       throw new IOException("Key cannot be null or empty");
198     }
199     if (lastKeyBuffer != null) {
200       int keyComp = comparator.compare(lastKeyBuffer, lastKeyOffset,
201           lastKeyLength, key, offset, length);
202       if (keyComp > 0) {
203         throw new IOException("Added a key not lexically larger than"
204             + " previous key="
205             + Bytes.toStringBinary(key, offset, length)
206             + ", lastkey="
207             + Bytes.toStringBinary(lastKeyBuffer, lastKeyOffset,
208                 lastKeyLength));
209       } else if (keyComp == 0) {
210         isDuplicateKey = true;
211       }
212     }
213     return isDuplicateKey;
214   }
215 
216   /** Checks the given value for validity. */
217   protected void checkValue(final byte[] value, final int offset,
218       final int length) throws IOException {
219     if (value == null) {
220       throw new IOException("Value cannot be null");
221     }
222   }
223 
224   /**
225    * @return Path or null if we were passed a stream rather than a Path.
226    */
227   @Override
228   public Path getPath() {
229     return path;
230   }
231 
232   @Override
233   public String toString() {
234     return "writer=" + (path != null ? path.toString() : null) + ", name="
235         + name + ", compression=" + compressAlgo.getName();
236   }
237 
238   /**
239    * Sets remaining trailer fields, writes the trailer to disk, and optionally
240    * closes the output stream.
241    */
242   protected void finishClose(FixedFileTrailer trailer) throws IOException {
243     trailer.setMetaIndexCount(metaNames.size());
244     trailer.setTotalUncompressedBytes(totalUncompressedBytes+ trailer.getTrailerSize());
245     trailer.setEntryCount(entryCount);
246     trailer.setCompressionCodec(compressAlgo);
247 
248     trailer.serialize(outputStream);
249 
250     if (closeOutputStream) {
251       outputStream.close();
252       outputStream = null;
253     }
254   }
255 
256   public static Compression.Algorithm compressionByName(String algoName) {
257     if (algoName == null)
258       return HFile.DEFAULT_COMPRESSION_ALGORITHM;
259     return Compression.getCompressionAlgorithmByName(algoName);
260   }
261 
262   /** A helper method to create HFile output streams in constructors */
263   protected static FSDataOutputStream createOutputStream(Configuration conf,
264       FileSystem fs, Path path) throws IOException {
265     FsPermission perms = FSUtils.getFilePermissions(fs, conf,
266         HConstants.DATA_FILE_UMASK_KEY);
267     return FSUtils.create(fs, path, perms);
268   }
269 }