View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.util.byterange;
20  
21  import java.util.ArrayList;
22  import java.util.List;
23  import java.util.Map;
24  
25  import org.apache.hadoop.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.util.ArrayUtils;
27  import org.apache.hadoop.hbase.util.ByteRange;
28  import org.apache.hadoop.hbase.util.Bytes;
29  
30  import com.google.common.collect.Lists;
31  
32  /**
33   * Performance oriented class for de-duping and storing arbitrary byte[]'s arriving in non-sorted
34   * order. Appends individual byte[]'s to a single big byte[] to avoid overhead and garbage.
35   * <p>
36   * Current implementations are {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeHashSet} and
37   * {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet}, but other options might be a
38   * trie-oriented ByteRangeTrieSet, etc
39   */
40  @InterfaceAudience.Private
41  public abstract class ByteRangeSet {
42  
43    /******************** fields **********************/
44  
45    protected byte[] byteAppender;
46    protected int numBytes;
47  
48    protected Map<ByteRange, Integer> uniqueIndexByUniqueRange;
49  
50    protected ArrayList<ByteRange> uniqueRanges;
51    protected int numUniqueRanges = 0;
52  
53    protected int[] uniqueRangeIndexByInsertionId;
54    protected int numInputs;
55  
56    protected List<Integer> sortedIndexByUniqueIndex;
57    protected int[] sortedIndexByInsertionId;
58    protected ArrayList<ByteRange> sortedRanges;
59  
60  
61    /****************** construct **********************/
62  
63    protected ByteRangeSet() {
64      this.byteAppender = new byte[0];
65      this.uniqueRanges = Lists.newArrayList();
66      this.uniqueRangeIndexByInsertionId = new int[0];
67      this.sortedIndexByUniqueIndex = Lists.newArrayList();
68      this.sortedIndexByInsertionId = new int[0];
69      this.sortedRanges = Lists.newArrayList();
70    }
71  
72    public void reset() {
73      numBytes = 0;
74      uniqueIndexByUniqueRange.clear();
75      numUniqueRanges = 0;
76      numInputs = 0;
77      sortedIndexByUniqueIndex.clear();
78      sortedRanges.clear();
79    }
80  
81  
82    /*************** abstract *************************/
83  
84    public abstract void addToSortedRanges();
85  
86  
87    /**************** methods *************************/
88  
89    /**
90     * Check if the incoming byte range exists.  If not, add it to the backing byteAppender[] and
91     * insert it into the tracking Map uniqueIndexByUniqueRange.
92     */
93    public void add(ByteRange bytes) {
94      Integer index = uniqueIndexByUniqueRange.get(bytes);
95      if (index == null) {
96        index = store(bytes);
97      }
98      int minLength = numInputs + 1;
99      uniqueRangeIndexByInsertionId = ArrayUtils.growIfNecessary(uniqueRangeIndexByInsertionId,
100         minLength, 2 * minLength);
101     uniqueRangeIndexByInsertionId[numInputs] = index;
102     ++numInputs;
103   }
104 
105   protected int store(ByteRange bytes) {
106     int indexOfNewElement = numUniqueRanges;
107     if (uniqueRanges.size() <= numUniqueRanges) {
108       uniqueRanges.add(new ByteRange());
109     }
110     ByteRange storedRange = uniqueRanges.get(numUniqueRanges);
111     int neededBytes = numBytes + bytes.getLength();
112     byteAppender = ArrayUtils.growIfNecessary(byteAppender, neededBytes, 2 * neededBytes);
113     bytes.deepCopyTo(byteAppender, numBytes);
114     storedRange.set(byteAppender, numBytes, bytes.getLength());// this isn't valid yet
115     numBytes += bytes.getLength();
116     uniqueIndexByUniqueRange.put(storedRange, indexOfNewElement);
117     int newestUniqueIndex = numUniqueRanges;
118     ++numUniqueRanges;
119     return newestUniqueIndex;
120   }
121 
122   public ByteRangeSet compile() {
123     addToSortedRanges();
124     for (int i = 0; i < sortedRanges.size(); ++i) {
125       sortedIndexByUniqueIndex.add(null);// need to grow the size
126     }
127     // TODO move this to an invert(int[]) util method
128     for (int i = 0; i < sortedIndexByUniqueIndex.size(); ++i) {
129       int uniqueIndex = uniqueIndexByUniqueRange.get(sortedRanges.get(i));
130       sortedIndexByUniqueIndex.set(uniqueIndex, i);
131     }
132     sortedIndexByInsertionId = ArrayUtils.growIfNecessary(sortedIndexByInsertionId, numInputs,
133         numInputs);
134     for (int i = 0; i < numInputs; ++i) {
135       int uniqueRangeIndex = uniqueRangeIndexByInsertionId[i];
136       int sortedIndex = sortedIndexByUniqueIndex.get(uniqueRangeIndex);
137       sortedIndexByInsertionId[i] = sortedIndex;
138     }
139     return this;
140   }
141 
142   public int getSortedIndexForInsertionId(int insertionId) {
143     return sortedIndexByInsertionId[insertionId];
144   }
145 
146   public int size() {
147     return uniqueIndexByUniqueRange.size();
148   }
149 
150 
151   /***************** standard methods ************************/
152 
153   @Override
154   public String toString() {
155     StringBuilder sb = new StringBuilder();
156     int i = 0;
157     for (ByteRange r : sortedRanges) {
158       if (i > 0) {
159         sb.append("\n");
160       }
161       sb.append(i + " " + Bytes.toStringBinary(r.deepCopyToNewArray()));
162       ++i;
163     }
164     sb.append("\ntotalSize:" + numBytes);
165     sb.append("\navgSize:" + getAvgSize());
166     return sb.toString();
167   }
168 
169 
170   /**************** get/set *****************************/
171 
172   public ArrayList<ByteRange> getSortedRanges() {
173     return sortedRanges;
174   }
175 
176   public long getAvgSize() {
177     return numBytes / numUniqueRanges;
178   }
179 
180 }