View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.io.DataOutputStream;
22  import java.io.IOException;
23  import java.net.InetSocketAddress;
24  import java.util.ArrayList;
25  import java.util.Arrays;
26  import java.util.List;
27  
28  import org.apache.hadoop.classification.InterfaceAudience;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FSDataOutputStream;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.fs.permission.FsPermission;
34  import org.apache.hadoop.hbase.HConstants;
35  import org.apache.hadoop.hbase.KeyValue.KeyComparator;
36  import org.apache.hadoop.hbase.io.compress.Compression;
37  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
38  import org.apache.hadoop.hbase.util.Bytes;
39  import org.apache.hadoop.hbase.util.FSUtils;
40  import org.apache.hadoop.io.RawComparator;
41  import org.apache.hadoop.io.Writable;
42  
43  /**
44   * Common functionality needed by all versions of {@link HFile} writers.
45   */
46  @InterfaceAudience.Private
47  public abstract class AbstractHFileWriter implements HFile.Writer {
48  
49    /** Key previously appended. Becomes the last key in the file. */
50    protected byte[] lastKeyBuffer = null;
51  
52    protected int lastKeyOffset = -1;
53    protected int lastKeyLength = -1;
54  
55    /** FileSystem stream to write into. */
56    protected FSDataOutputStream outputStream;
57  
58    /** True if we opened the <code>outputStream</code> (and so will close it). */
59    protected final boolean closeOutputStream;
60  
61    /** A "file info" block: a key-value map of file-wide metadata. */
62    protected FileInfo fileInfo = new HFile.FileInfo();
63  
64    /** Number of uncompressed bytes we allow per block. */
65    protected final int blockSize;
66  
67    /** Total # of key/value entries, i.e. how many times add() was called. */
68    protected long entryCount = 0;
69  
70    /** Used for calculating the average key length. */
71    protected long totalKeyLength = 0;
72  
73    /** Used for calculating the average value length. */
74    protected long totalValueLength = 0;
75  
76    /** Total uncompressed bytes, maybe calculate a compression ratio later. */
77    protected long totalUncompressedBytes = 0;
78  
79    /** Key comparator. Used to ensure we write in order. */
80    protected final RawComparator<byte[]> comparator;
81  
82    /** Meta block names. */
83    protected List<byte[]> metaNames = new ArrayList<byte[]>();
84  
85    /** {@link Writable}s representing meta block data. */
86    protected List<Writable> metaData = new ArrayList<Writable>();
87  
88    /** The compression algorithm used. NONE if no compression. */
89    protected final Compression.Algorithm compressAlgo;
90    
91    /**
92     * The data block encoding which will be used.
93     * {@link NoOpDataBlockEncoder#INSTANCE} if there is no encoding.
94     */
95    protected final HFileDataBlockEncoder blockEncoder;
96  
97    /** First key in a block. */
98    protected byte[] firstKeyInBlock = null;
99  
100   /** May be null if we were passed a stream. */
101   protected final Path path;
102 
103 
104   /** Cache configuration for caching data on write. */
105   protected final CacheConfig cacheConf;
106 
107   /**
108    * Name for this object used when logging or in toString. Is either
109    * the result of a toString on stream or else name of passed file Path.
110    */
111   protected final String name;
112 
113   public AbstractHFileWriter(CacheConfig cacheConf,
114       FSDataOutputStream outputStream, Path path, int blockSize,
115       Compression.Algorithm compressAlgo,
116       HFileDataBlockEncoder dataBlockEncoder,
117       KeyComparator comparator) {
118     this.outputStream = outputStream;
119     this.path = path;
120     this.name = path != null ? path.getName() : outputStream.toString();
121     this.blockSize = blockSize;
122     this.compressAlgo = compressAlgo == null
123         ? HFile.DEFAULT_COMPRESSION_ALGORITHM : compressAlgo;
124     this.blockEncoder = dataBlockEncoder != null
125         ? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE;
126     this.comparator = comparator != null ? comparator
127         : Bytes.BYTES_RAWCOMPARATOR;
128 
129     closeOutputStream = path != null;
130     this.cacheConf = cacheConf;
131   }
132 
133   /**
134    * Add last bits of metadata to file info before it is written out.
135    */
136   protected void finishFileInfo() throws IOException {
137     if (lastKeyBuffer != null) {
138       // Make a copy. The copy is stuffed into our fileinfo map. Needs a clean
139       // byte buffer. Won't take a tuple.
140       fileInfo.append(FileInfo.LASTKEY, Arrays.copyOfRange(lastKeyBuffer,
141           lastKeyOffset, lastKeyOffset + lastKeyLength), false);
142     }
143 
144     // Average key length.
145     int avgKeyLen =
146         entryCount == 0 ? 0 : (int) (totalKeyLength / entryCount);
147     fileInfo.append(FileInfo.AVG_KEY_LEN, Bytes.toBytes(avgKeyLen), false);
148 
149     // Average value length.
150     int avgValueLen =
151         entryCount == 0 ? 0 : (int) (totalValueLength / entryCount);
152     fileInfo.append(FileInfo.AVG_VALUE_LEN, Bytes.toBytes(avgValueLen), false);
153   }
154 
155   /**
156    * Add to the file info. All added key/value pairs can be obtained using
157    * {@link HFile.Reader#loadFileInfo()}.
158    *
159    * @param k Key
160    * @param v Value
161    * @throws IOException in case the key or the value are invalid
162    */
163   @Override
164   public void appendFileInfo(final byte[] k, final byte[] v)
165       throws IOException {
166     fileInfo.append(k, v, true);
167   }
168 
169   /**
170    * Sets the file info offset in the trailer, finishes up populating fields in
171    * the file info, and writes the file info into the given data output. The
172    * reason the data output is not always {@link #outputStream} is that we store
173    * file info as a block in version 2.
174    *
175    * @param trailer fixed file trailer
176    * @param out the data output to write the file info to
177    * @throws IOException
178    */
179   protected final void writeFileInfo(FixedFileTrailer trailer, DataOutputStream out)
180   throws IOException {
181     trailer.setFileInfoOffset(outputStream.getPos());
182     finishFileInfo();
183     fileInfo.write(out);
184   }
185 
186   /**
187    * Checks that the given key does not violate the key order.
188    *
189    * @param key Key to check.
190    * @return true if the key is duplicate
191    * @throws IOException if the key or the key order is wrong
192    */
193   protected boolean checkKey(final byte[] key, final int offset,
194       final int length) throws IOException {
195     boolean isDuplicateKey = false;
196 
197     if (key == null || length <= 0) {
198       throw new IOException("Key cannot be null or empty");
199     }
200     if (lastKeyBuffer != null) {
201       int keyComp = comparator.compare(lastKeyBuffer, lastKeyOffset,
202           lastKeyLength, key, offset, length);
203       if (keyComp > 0) {
204         throw new IOException("Added a key not lexically larger than"
205             + " previous key="
206             + Bytes.toStringBinary(key, offset, length)
207             + ", lastkey="
208             + Bytes.toStringBinary(lastKeyBuffer, lastKeyOffset,
209                 lastKeyLength));
210       } else if (keyComp == 0) {
211         isDuplicateKey = true;
212       }
213     }
214     return isDuplicateKey;
215   }
216 
217   /** Checks the given value for validity. */
218   protected void checkValue(final byte[] value, final int offset,
219       final int length) throws IOException {
220     if (value == null) {
221       throw new IOException("Value cannot be null");
222     }
223   }
224 
225   /**
226    * @return Path or null if we were passed a stream rather than a Path.
227    */
228   @Override
229   public Path getPath() {
230     return path;
231   }
232 
233   @Override
234   public String toString() {
235     return "writer=" + (path != null ? path.toString() : null) + ", name="
236         + name + ", compression=" + compressAlgo.getName();
237   }
238 
239   /**
240    * Sets remaining trailer fields, writes the trailer to disk, and optionally
241    * closes the output stream.
242    */
243   protected void finishClose(FixedFileTrailer trailer) throws IOException {
244     trailer.setMetaIndexCount(metaNames.size());
245     trailer.setTotalUncompressedBytes(totalUncompressedBytes+ trailer.getTrailerSize());
246     trailer.setEntryCount(entryCount);
247     trailer.setCompressionCodec(compressAlgo);
248 
249     trailer.serialize(outputStream);
250 
251     if (closeOutputStream) {
252       outputStream.close();
253       outputStream = null;
254     }
255   }
256 
257   public static Compression.Algorithm compressionByName(String algoName) {
258     if (algoName == null)
259       return HFile.DEFAULT_COMPRESSION_ALGORITHM;
260     return Compression.getCompressionAlgorithmByName(algoName);
261   }
262 
263   /** A helper method to create HFile output streams in constructors */
264   protected static FSDataOutputStream createOutputStream(Configuration conf,
265       FileSystem fs, Path path, InetSocketAddress[] favoredNodes) throws IOException {
266     FsPermission perms = FSUtils.getFilePermissions(fs, conf,
267         HConstants.DATA_FILE_UMASK_KEY);
268     return FSUtils.create(fs, path, perms, favoredNodes);
269   }
270 }