View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.filter;
19  
20  import com.google.protobuf.ByteString;
21  import com.google.protobuf.InvalidProtocolBufferException;
22  import org.apache.hadoop.classification.InterfaceAudience;
23  import org.apache.hadoop.classification.InterfaceStability;
24  import org.apache.hadoop.hbase.KeyValue;
25  import org.apache.hadoop.hbase.exceptions.DeserializationException;
26  import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
27  import org.apache.hadoop.hbase.util.Bytes;
28  
29  import java.util.ArrayList;
30  import java.util.Arrays;
31  import java.util.Comparator;
32  import java.util.TreeSet;
33  
34  /**
35   * This filter is used for selecting only those keys with columns that matches
36   * a particular prefix. For example, if prefix is 'an', it will pass keys will
37   * columns like 'and', 'anti' but not keys with columns like 'ball', 'act'.
38   */
39  @InterfaceAudience.Public
40  @InterfaceStability.Stable
41  public class MultipleColumnPrefixFilter extends FilterBase {
42    protected byte [] hint = null;
43    protected TreeSet<byte []> sortedPrefixes = createTreeSet();
44    private final static int MAX_LOG_PREFIXES = 5;
45  
46    public MultipleColumnPrefixFilter(final byte [][] prefixes) {
47      if (prefixes != null) {
48        for (int i = 0; i < prefixes.length; i++) {
49          if (!sortedPrefixes.add(prefixes[i]))
50            throw new IllegalArgumentException ("prefixes must be distinct");
51        }
52      }
53    }
54  
55    public byte [][] getPrefix() {
56      int count = 0;
57      byte [][] temp = new byte [sortedPrefixes.size()][];
58      for (byte [] prefixes : sortedPrefixes) {
59        temp [count++] = prefixes;
60      }
61      return temp;
62    }
63  
64    @Override
65    public ReturnCode filterKeyValue(KeyValue kv) {
66      if (sortedPrefixes.size() == 0 || kv.getBuffer() == null) {
67        return ReturnCode.INCLUDE;
68      } else {
69        return filterColumn(kv.getBuffer(), kv.getQualifierOffset(), kv.getQualifierLength());
70      }
71    }
72  
73    public ReturnCode filterColumn(byte[] buffer, int qualifierOffset, int qualifierLength) {
74      byte [] qualifier = Arrays.copyOfRange(buffer, qualifierOffset,
75                                             qualifierLength + qualifierOffset);
76      TreeSet<byte []> lesserOrEqualPrefixes =
77        (TreeSet<byte []>) sortedPrefixes.headSet(qualifier, true);
78  
79      if (lesserOrEqualPrefixes.size() != 0) {
80        byte [] largestPrefixSmallerThanQualifier = lesserOrEqualPrefixes.last();
81        
82        if (Bytes.startsWith(qualifier, largestPrefixSmallerThanQualifier)) {
83          return ReturnCode.INCLUDE;
84        }
85        
86        if (lesserOrEqualPrefixes.size() == sortedPrefixes.size()) {
87          return ReturnCode.NEXT_ROW;
88        } else {
89          hint = sortedPrefixes.higher(largestPrefixSmallerThanQualifier);
90          return ReturnCode.SEEK_NEXT_USING_HINT;
91        }
92      } else {
93        hint = sortedPrefixes.first();
94        return ReturnCode.SEEK_NEXT_USING_HINT;
95      }
96    }
97  
98    public static Filter createFilterFromArguments(ArrayList<byte []> filterArguments) {
99      byte [][] prefixes = new byte [filterArguments.size()][];
100     for (int i = 0 ; i < filterArguments.size(); i++) {
101       byte [] columnPrefix = ParseFilter.removeQuotesFromByteArray(filterArguments.get(i));
102       prefixes[i] = columnPrefix;
103     }
104     return new MultipleColumnPrefixFilter(prefixes);
105   }
106 
107   /**
108    * @return The filter serialized using pb
109    */
110   public byte [] toByteArray() {
111     FilterProtos.MultipleColumnPrefixFilter.Builder builder =
112       FilterProtos.MultipleColumnPrefixFilter.newBuilder();
113     for (byte [] element : sortedPrefixes) {
114       if (element != null) builder.addSortedPrefixes(ByteString.copyFrom(element));
115     }
116     return builder.build().toByteArray();
117   }
118 
119   /**
120    * @param pbBytes A pb serialized {@link MultipleColumnPrefixFilter} instance
121    * @return An instance of {@link MultipleColumnPrefixFilter} made from <code>bytes</code>
122    * @throws DeserializationException
123    * @see #toByteArray
124    */
125   public static MultipleColumnPrefixFilter parseFrom(final byte [] pbBytes)
126   throws DeserializationException {
127     FilterProtos.MultipleColumnPrefixFilter proto;
128     try {
129       proto = FilterProtos.MultipleColumnPrefixFilter.parseFrom(pbBytes);
130     } catch (InvalidProtocolBufferException e) {
131       throw new DeserializationException(e);
132     }
133     int numPrefixes = proto.getSortedPrefixesCount();
134     byte [][] prefixes = new byte[numPrefixes][];
135     for (int i = 0; i < numPrefixes; ++i) {
136       prefixes[i] = proto.getSortedPrefixes(i).toByteArray();
137     }
138 
139     return new MultipleColumnPrefixFilter(prefixes);
140   }
141 
142   /**
143    * @param other
144    * @return true if and only if the fields of the filter that are serialized
145    * are equal to the corresponding fields in other.  Used for testing.
146    */
147   boolean areSerializedFieldsEqual(Filter o) {
148     if (o == this) return true;
149     if (!(o instanceof MultipleColumnPrefixFilter)) return false;
150 
151     MultipleColumnPrefixFilter other = (MultipleColumnPrefixFilter)o;
152     return this.sortedPrefixes.equals(other.sortedPrefixes);
153   }
154 
155   public KeyValue getNextKeyHint(KeyValue kv) {
156     return KeyValue.createFirstOnRow(
157       kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(), kv.getBuffer(),
158       kv.getFamilyOffset(), kv.getFamilyLength(), hint, 0, hint.length);
159   }
160 
161   public TreeSet<byte []> createTreeSet() {
162     return new TreeSet<byte []>(new Comparator<Object>() {
163         @Override
164           public int compare (Object o1, Object o2) {
165           if (o1 == null || o2 == null)
166             throw new IllegalArgumentException ("prefixes can't be null");
167 
168           byte [] b1 = (byte []) o1;
169           byte [] b2 = (byte []) o2;
170           return Bytes.compareTo (b1, 0, b1.length, b2, 0, b2.length);
171         }
172       });
173   }
174 
175   @Override
176   public String toString() {
177     return toString(MAX_LOG_PREFIXES);
178   }
179 
180   protected String toString(int maxPrefixes) {
181     StringBuilder prefixes = new StringBuilder();
182 
183     int count = 0;
184     for (byte[] ba : this.sortedPrefixes) {
185       if (count >= maxPrefixes) {
186         break;
187       }
188       ++count;
189       prefixes.append(Bytes.toStringBinary(ba));
190       if (count < this.sortedPrefixes.size() && count < maxPrefixes) {
191         prefixes.append(", ");
192       }
193     }
194 
195     return String.format("%s (%d/%d): [%s]", this.getClass().getSimpleName(),
196         count, this.sortedPrefixes.size(), prefixes.toString());
197   }
198 }