View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.Collection;
23  import java.util.Collections;
24  import java.util.List;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.hbase.KeyValue;
29  import org.apache.hadoop.hbase.KeyValue.KVComparator;
30  import org.apache.hadoop.hbase.classification.InterfaceAudience;
31  import org.apache.hadoop.hbase.regionserver.StoreFile.Writer;
32  import org.apache.hadoop.hbase.util.Bytes;
33  
34  /**
35   * Base class for cell sink that separates the provided cells into multiple files for stripe
36   * compaction.
37   */
38  @InterfaceAudience.Private
39  public abstract class StripeMultiFileWriter extends AbstractMultiFileWriter {
40  
41    private static final Log LOG = LogFactory.getLog(StripeMultiFileWriter.class);
42  
43    protected final KVComparator comparator;
44    protected List<StoreFile.Writer> existingWriters;
45    protected List<byte[]> boundaries;
46  
47    /** Whether to write stripe metadata */
48    private boolean doWriteStripeMetadata = true;
49  
50    public StripeMultiFileWriter(KVComparator comparator) {
51      this.comparator = comparator;
52    }
53  
54    public void setNoStripeMetadata() {
55      this.doWriteStripeMetadata = false;
56    }
57  
58    @Override
59    protected Collection<Writer> writers() {
60      return existingWriters;
61    }
62  
63    @Override
64    protected void preCloseWriter(Writer writer) throws IOException {
65      if (doWriteStripeMetadata) {
66        LOG.debug("Write stripe metadata for " + writer.getPath().toString());
67        int index = existingWriters.indexOf(writer);
68        writer.appendFileInfo(StripeStoreFileManager.STRIPE_START_KEY, boundaries.get(index));
69        writer.appendFileInfo(StripeStoreFileManager.STRIPE_END_KEY, boundaries.get(index + 1));
70      } else {
71        if (LOG.isDebugEnabled()) {
72          LOG.debug("Skip writing stripe metadata for " + writer.getPath().toString());
73        }
74      }
75    }
76  
77    /**
78     * Subclasses can call this method to make sure the first KV is within multi-writer range.
79     * @param left The left boundary of the writer.
80     * @param row The row to check.
81     * @param rowOffset Offset for row.
82     * @param rowLength Length for row.
83     */
84    protected void sanityCheckLeft(byte[] left, byte[] row, int rowOffset, int rowLength)
85        throws IOException {
86      if (StripeStoreFileManager.OPEN_KEY != left
87          && comparator.compareRows(row, rowOffset, rowLength, left, 0, left.length) < 0) {
88        String error = "The first row is lower than the left boundary of [" + Bytes.toString(left)
89            + "]: [" + Bytes.toString(row, rowOffset, rowLength) + "]";
90        LOG.error(error);
91        throw new IOException(error);
92      }
93    }
94  
95    /**
96     * Subclasses can call this method to make sure the last KV is within multi-writer range.
97     * @param right The right boundary of the writer.
98     * @param row The row to check.
99     * @param rowOffset Offset for row.
100    * @param rowLength Length for row.
101    */
102   protected void sanityCheckRight(byte[] right, byte[] row, int rowOffset, int rowLength)
103       throws IOException {
104     if (StripeStoreFileManager.OPEN_KEY != right
105         && comparator.compareRows(row, rowOffset, rowLength, right, 0, right.length) >= 0) {
106       String error = "The last row is higher or equal than the right boundary of ["
107           + Bytes.toString(right) + "]: [" + Bytes.toString(row, rowOffset, rowLength) + "]";
108       LOG.error(error);
109       throw new IOException(error);
110     }
111   }
112 
113   /**
114    * MultiWriter that separates the cells based on fixed row-key boundaries. All the KVs between
115    * each pair of neighboring boundaries from the list supplied to ctor will end up in one file, and
116    * separate from all other such pairs.
117    */
118   public static class BoundaryMultiWriter extends StripeMultiFileWriter {
119     private StoreFile.Writer currentWriter;
120     private byte[] currentWriterEndKey;
121 
122     private KeyValue lastKv;
123     private long kvsInCurrentWriter = 0;
124     private int majorRangeFromIndex = -1, majorRangeToIndex = -1;
125     private boolean hasAnyWriter = false;
126 
127     /**
128      * @param targetBoundaries The boundaries on which writers/files are separated.
129      * @param majorRangeFrom Major range is the range for which at least one file should be written
130      *          (because all files are included in compaction). majorRangeFrom is the left boundary.
131      * @param majorRangeTo The right boundary of majorRange (see majorRangeFrom).
132      */
133     public BoundaryMultiWriter(KVComparator comparator, List<byte[]> targetBoundaries,
134         byte[] majorRangeFrom, byte[] majorRangeTo) throws IOException {
135       super(comparator);
136       this.boundaries = targetBoundaries;
137       this.existingWriters = new ArrayList<StoreFile.Writer>(this.boundaries.size() - 1);
138       // "major" range (range for which all files are included) boundaries, if any,
139       // must match some target boundaries, let's find them.
140       assert (majorRangeFrom == null) == (majorRangeTo == null);
141       if (majorRangeFrom != null) {
142         majorRangeFromIndex = (majorRangeFrom == StripeStoreFileManager.OPEN_KEY) ? 0
143             : Collections.binarySearch(this.boundaries, majorRangeFrom, Bytes.BYTES_COMPARATOR);
144         majorRangeToIndex = (majorRangeTo == StripeStoreFileManager.OPEN_KEY) ? boundaries.size()
145             : Collections.binarySearch(this.boundaries, majorRangeTo, Bytes.BYTES_COMPARATOR);
146         if (this.majorRangeFromIndex < 0 || this.majorRangeToIndex < 0) {
147           throw new IOException("Major range does not match writer boundaries: ["
148               + Bytes.toString(majorRangeFrom) + "] [" + Bytes.toString(majorRangeTo) + "]; from "
149               + majorRangeFromIndex + " to " + majorRangeToIndex);
150         }
151       }
152     }
153 
154     @Override
155     public void append(KeyValue kv) throws IOException {
156       if (currentWriter == null && existingWriters.isEmpty()) {
157         // First append ever, do a sanity check.
158         sanityCheckLeft(this.boundaries.get(0), kv.getRowArray(), kv.getRowOffset(),
159           kv.getRowLength());
160       }
161       prepareWriterFor(kv);
162       currentWriter.append(kv);
163       lastKv = kv; // for the sanity check
164       ++kvsInCurrentWriter;
165     }
166 
167     private boolean isKvAfterCurrentWriter(KeyValue kv) {
168       return ((currentWriterEndKey != StripeStoreFileManager.OPEN_KEY)
169           && (comparator.compareRows(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
170             currentWriterEndKey, 0, currentWriterEndKey.length) >= 0));
171     }
172 
173     @Override
174     protected void preCommitWriters() throws IOException {
175       stopUsingCurrentWriter();
176       while (existingWriters.size() < boundaries.size() - 1) {
177         createEmptyWriter();
178       }
179       if (lastKv != null) {
180         sanityCheckRight(boundaries.get(boundaries.size() - 1), lastKv.getRowArray(),
181           lastKv.getRowOffset(), lastKv.getRowLength());
182       }
183     }
184 
185     private void prepareWriterFor(KeyValue kv) throws IOException {
186       if (currentWriter != null && !isKvAfterCurrentWriter(kv)) return; // Use same writer.
187 
188       stopUsingCurrentWriter();
189       // See if KV will be past the writer we are about to create; need to add another one.
190       while (isKvAfterCurrentWriter(kv)) {
191         checkCanCreateWriter();
192         createEmptyWriter();
193       }
194       checkCanCreateWriter();
195       hasAnyWriter = true;
196       currentWriter = writerFactory.createWriter();
197       existingWriters.add(currentWriter);
198     }
199 
200     /**
201      * Called if there are no cells for some stripe. We need to have something in the writer list
202      * for this stripe, so that writer-boundary list indices correspond to each other. We can insert
203      * null in the writer list for that purpose, except in the following cases where we actually
204      * need a file: 1) If we are in range for which we are compacting all the files, we need to
205      * create an empty file to preserve stripe metadata. 2) If we have not produced any file at all
206      * for this compactions, and this is the last chance (the last stripe), we need to preserve last
207      * seqNum (see also HBASE-6059).
208      */
209     private void createEmptyWriter() throws IOException {
210       int index = existingWriters.size();
211       boolean isInMajorRange = (index >= majorRangeFromIndex) && (index < majorRangeToIndex);
212       // Stripe boundary count = stripe count + 1, so last stripe index is (#boundaries minus 2)
213       boolean isLastWriter = !hasAnyWriter && (index == (boundaries.size() - 2));
214       boolean needEmptyFile = isInMajorRange || isLastWriter;
215       existingWriters.add(needEmptyFile ? writerFactory.createWriter() : null);
216       hasAnyWriter |= needEmptyFile;
217       currentWriterEndKey = (existingWriters.size() + 1 == boundaries.size()) ? null
218           : boundaries.get(existingWriters.size() + 1);
219     }
220 
221     private void checkCanCreateWriter() throws IOException {
222       int maxWriterCount = boundaries.size() - 1;
223       assert existingWriters.size() <= maxWriterCount;
224       if (existingWriters.size() >= maxWriterCount) {
225         throw new IOException("Cannot create any more writers (created " + existingWriters.size()
226             + " out of " + maxWriterCount + " - row might be out of range of all valid writers");
227       }
228     }
229 
230     private void stopUsingCurrentWriter() {
231       if (currentWriter != null) {
232         if (LOG.isDebugEnabled()) {
233           LOG.debug("Stopping to use a writer after [" + Bytes.toString(currentWriterEndKey)
234               + "] row; wrote out " + kvsInCurrentWriter + " kvs");
235         }
236         kvsInCurrentWriter = 0;
237       }
238       currentWriter = null;
239       currentWriterEndKey = (existingWriters.size() + 1 == boundaries.size()) ? null
240           : boundaries.get(existingWriters.size() + 1);
241     }
242   }
243 
244   /**
245    * MultiWriter that separates the cells based on target cell number per file and file count. New
246    * file is started every time the target number of KVs is reached, unless the fixed count of
247    * writers has already been created (in that case all the remaining KVs go into the last writer).
248    */
249   public static class SizeMultiWriter extends StripeMultiFileWriter {
250     private int targetCount;
251     private long targetKvs;
252     private byte[] left;
253     private byte[] right;
254 
255     private KeyValue lastKv;
256     private StoreFile.Writer currentWriter;
257     protected byte[] lastRowInCurrentWriter = null;
258     private long kvsInCurrentWriter = 0;
259     private long kvsSeen = 0;
260     private long kvsSeenInPrevious = 0;
261 
262     /**
263      * @param targetCount The maximum count of writers that can be created.
264      * @param targetKvs The number of KVs to read from source before starting each new writer.
265      * @param left The left boundary of the first writer.
266      * @param right The right boundary of the last writer.
267      */
268     public SizeMultiWriter(KVComparator comparator, int targetCount, long targetKvs, byte[] left,
269         byte[] right) {
270       super(comparator);
271       this.targetCount = targetCount;
272       this.targetKvs = targetKvs;
273       this.left = left;
274       this.right = right;
275       int preallocate = Math.min(this.targetCount, 64);
276       this.existingWriters = new ArrayList<StoreFile.Writer>(preallocate);
277       this.boundaries = new ArrayList<byte[]>(preallocate + 1);
278     }
279 
280     @Override
281     public void append(KeyValue kv) throws IOException {
282       // If we are waiting for opportunity to close and we started writing different row,
283       // discard the writer and stop waiting.
284       boolean doCreateWriter = false;
285       if (currentWriter == null) {
286         // First append ever, do a sanity check.
287         sanityCheckLeft(left, kv.getRowArray(), kv.getRowOffset(), kv.getRowLength());
288         doCreateWriter = true;
289       } else if (lastRowInCurrentWriter != null
290           && !comparator.matchingRows(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
291             lastRowInCurrentWriter, 0, lastRowInCurrentWriter.length)) {
292         if (LOG.isDebugEnabled()) {
293           LOG.debug("Stopping to use a writer after [" + Bytes.toString(lastRowInCurrentWriter)
294               + "] row; wrote out " + kvsInCurrentWriter + " kvs");
295         }
296         lastRowInCurrentWriter = null;
297         kvsInCurrentWriter = 0;
298         kvsSeenInPrevious += kvsSeen;
299         doCreateWriter = true;
300       }
301       if (doCreateWriter) {
302         byte[] boundary = existingWriters.isEmpty() ? left : kv.getRow(); // make a copy
303         if (LOG.isDebugEnabled()) {
304           LOG.debug("Creating new writer starting at [" + Bytes.toString(boundary) + "]");
305         }
306         currentWriter = writerFactory.createWriter();
307         boundaries.add(boundary);
308         existingWriters.add(currentWriter);
309       }
310 
311       currentWriter.append(kv);
312       lastKv = kv; // for the sanity check
313       ++kvsInCurrentWriter;
314       kvsSeen = kvsInCurrentWriter;
315       if (this.sourceScanner != null) {
316         kvsSeen = Math.max(kvsSeen,
317           this.sourceScanner.getEstimatedNumberOfKvsScanned() - kvsSeenInPrevious);
318       }
319 
320       // If we are not already waiting for opportunity to close, start waiting if we can
321       // create any more writers and if the current one is too big.
322       if (lastRowInCurrentWriter == null && existingWriters.size() < targetCount
323           && kvsSeen >= targetKvs) {
324         lastRowInCurrentWriter = kv.getRow(); // make a copy
325         if (LOG.isDebugEnabled()) {
326           LOG.debug("Preparing to start a new writer after ["
327               + Bytes.toString(lastRowInCurrentWriter) + "] row; observed " + kvsSeen
328               + " kvs and wrote out " + kvsInCurrentWriter + " kvs");
329         }
330       }
331     }
332 
333     @Override
334     protected void preCommitWriters() throws IOException {
335       if (LOG.isDebugEnabled()) {
336         LOG.debug("Stopping with " + kvsInCurrentWriter + " kvs in last writer"
337             + ((this.sourceScanner == null) ? ""
338                 : ("; observed estimated " + this.sourceScanner.getEstimatedNumberOfKvsScanned()
339                     + " KVs total")));
340       }
341       if (lastKv != null) {
342         sanityCheckRight(right, lastKv.getRowArray(), lastKv.getRowOffset(), lastKv.getRowLength());
343       }
344 
345       // When expired stripes were going to be merged into one, and if no writer was created during
346       // the compaction, we need to create an empty file to preserve metadata.
347       if (existingWriters.isEmpty() && 1 == targetCount) {
348         if (LOG.isDebugEnabled()) {
349           LOG.debug("Merge expired stripes into one, create an empty file to preserve metadata.");
350         }
351         boundaries.add(left);
352         existingWriters.add(writerFactory.createWriter());
353       }
354 
355       this.boundaries.add(right);
356     }
357   }
358 }