View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.filter;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.KeyValue;
26  import org.apache.hadoop.hbase.client.Scan;
27  import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
28  import org.apache.hadoop.hbase.io.HbaseObjectWritable;
29  import org.apache.hadoop.hbase.util.Bytes;
30  
31  import java.io.DataInput;
32  import java.io.DataOutput;
33  import java.io.IOException;
34  import java.util.Arrays;
35  import java.util.List;
36  
37  /**
38   * This filter is used to filter cells based on value. It takes a {@link CompareFilter.CompareOp}
39   * operator (equal, greater, not equal, etc), and either a byte [] value or
40   * a WritableByteArrayComparable.
41   * <p>
42   * If we have a byte [] value then we just do a lexicographic compare. For
43   * example, if passed value is 'b' and cell has 'a' and the compare operator
44   * is LESS, then we will filter out this cell (return true).  If this is not
45   * sufficient (eg you want to deserialize a long and then compare it to a fixed
46   * long value), then you can pass in your own comparator instead.
47   * <p>
48   * You must also specify a family and qualifier.  Only the value of this column
49   * will be tested. When using this filter on a {@link Scan} with specified
50   * inputs, the column to be tested should also be added as input (otherwise
51   * the filter will regard the column as missing).
52   * <p>
53   * To prevent the entire row from being emitted if the column is not found
54   * on a row, use {@link #setFilterIfMissing}.
55   * Otherwise, if the column is found, the entire row will be emitted only if
56   * the value passes.  If the value fails, the row will be filtered out.
57   * <p>
58   * In order to test values of previous versions (timestamps), set
59   * {@link #setLatestVersionOnly} to false. The default is true, meaning that
60   * only the latest version's value is tested and all previous versions are ignored.
61   * <p>
62   * To filter based on the value of all scanned columns, use {@link ValueFilter}.
63   */
64  public class SingleColumnValueFilter extends FilterBase {
65    static final Log LOG = LogFactory.getLog(SingleColumnValueFilter.class);
66  
67    protected byte [] columnFamily;
68    protected byte [] columnQualifier;
69    private CompareOp compareOp;
70    private WritableByteArrayComparable comparator;
71    private boolean foundColumn = false;
72    private boolean matchedColumn = false;
73    private boolean filterIfMissing = false;
74    private boolean latestVersionOnly = true;
75  
76    /**
77     * Writable constructor, do not use.
78     */
79    public SingleColumnValueFilter() {
80    }
81  
82    /**
83     * Constructor for binary compare of the value of a single column.  If the
84     * column is found and the condition passes, all columns of the row will be
85     * emitted.  If the column is not found or the condition fails, the row will
86     * not be emitted.
87     *
88     * @param family name of column family
89     * @param qualifier name of column qualifier
90     * @param compareOp operator
91     * @param value value to compare column values against
92     */
93    public SingleColumnValueFilter(final byte [] family, final byte [] qualifier,
94        final CompareOp compareOp, final byte[] value) {
95      this(family, qualifier, compareOp, new BinaryComparator(value));
96    }
97  
98    /**
99     * Constructor for binary compare of the value of a single column.  If the
100    * column is found and the condition passes, all columns of the row will be
101    * emitted.  If the condition fails, the row will not be emitted.
102    * <p>
103    * Use the filterIfColumnMissing flag to set whether the rest of the columns
104    * in a row will be emitted if the specified column to check is not found in
105    * the row.
106    *
107    * @param family name of column family
108    * @param qualifier name of column qualifier
109    * @param compareOp operator
110    * @param comparator Comparator to use.
111    */
112   public SingleColumnValueFilter(final byte [] family, final byte [] qualifier,
113       final CompareOp compareOp, final WritableByteArrayComparable comparator) {
114     this.columnFamily = family;
115     this.columnQualifier = qualifier;
116     this.compareOp = compareOp;
117     this.comparator = comparator;
118   }
119 
120   /**
121    * @return operator
122    */
123   public CompareOp getOperator() {
124     return compareOp;
125   }
126 
127   /**
128    * @return the comparator
129    */
130   public WritableByteArrayComparable getComparator() {
131     return comparator;
132   }
133 
134   /**
135    * @return the family
136    */
137   public byte[] getFamily() {
138     return columnFamily;
139   }
140 
141   /**
142    * @return the qualifier
143    */
144   public byte[] getQualifier() {
145     return columnQualifier;
146   }
147 
148   public ReturnCode filterKeyValue(KeyValue keyValue) {
149     // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" + Bytes.toString(keyValue.getValue()));
150     if (this.matchedColumn) {
151       // We already found and matched the single column, all keys now pass
152       return ReturnCode.INCLUDE;
153     } else if (this.latestVersionOnly && this.foundColumn) {
154       // We found but did not match the single column, skip to next row
155       return ReturnCode.NEXT_ROW;
156     }
157     if (!keyValue.matchingColumn(this.columnFamily, this.columnQualifier)) {
158       return ReturnCode.INCLUDE;
159     }
160     foundColumn = true;
161     if (filterColumnValue(keyValue.getBuffer(),
162         keyValue.getValueOffset(), keyValue.getValueLength())) {
163       return this.latestVersionOnly? ReturnCode.NEXT_ROW: ReturnCode.INCLUDE;
164     }
165     this.matchedColumn = true;
166     return ReturnCode.INCLUDE;
167   }
168 
169   private boolean filterColumnValue(final byte [] data, final int offset,
170       final int length) {
171     // TODO: Can this filter take a rawcomparator so don't have to make this
172     // byte array copy?
173     int compareResult =
174       this.comparator.compareTo(Arrays.copyOfRange(data, offset, offset + length));
175     switch (this.compareOp) {
176     case LESS:
177       return compareResult <= 0;
178     case LESS_OR_EQUAL:
179       return compareResult < 0;
180     case EQUAL:
181       return compareResult != 0;
182     case NOT_EQUAL:
183       return compareResult == 0;
184     case GREATER_OR_EQUAL:
185       return compareResult > 0;
186     case GREATER:
187       return compareResult >= 0;
188     default:
189       throw new RuntimeException("Unknown Compare op " + compareOp.name());
190     }
191   }
192 
193   public boolean filterRow() {
194     // If column was found, return false if it was matched, true if it was not
195     // If column not found, return true if we filter if missing, false if not
196     return this.foundColumn? !this.matchedColumn: this.filterIfMissing;
197   }
198 
199   public void reset() {
200     foundColumn = false;
201     matchedColumn = false;
202   }
203 
204   /**
205    * Get whether entire row should be filtered if column is not found.
206    * @return true if row should be skipped if column not found, false if row
207    * should be let through anyways
208    */
209   public boolean getFilterIfMissing() {
210     return filterIfMissing;
211   }
212 
213   /**
214    * Set whether entire row should be filtered if column is not found.
215    * <p>
216    * If true, the entire row will be skipped if the column is not found.
217    * <p>
218    * If false, the row will pass if the column is not found.  This is default.
219    * @param filterIfMissing flag
220    */
221   public void setFilterIfMissing(boolean filterIfMissing) {
222     this.filterIfMissing = filterIfMissing;
223   }
224 
225   /**
226    * Get whether only the latest version of the column value should be compared.
227    * If true, the row will be returned if only the latest version of the column
228    * value matches. If false, the row will be returned if any version of the
229    * column value matches. The default is true.
230    * @return return value
231    */
232   public boolean getLatestVersionOnly() {
233     return latestVersionOnly;
234   }
235 
236   /**
237    * Set whether only the latest version of the column value should be compared.
238    * If true, the row will be returned if only the latest version of the column
239    * value matches. If false, the row will be returned if any version of the
240    * column value matches. The default is true.
241    * @param latestVersionOnly flag
242    */
243   public void setLatestVersionOnly(boolean latestVersionOnly) {
244     this.latestVersionOnly = latestVersionOnly;
245   }
246 
247   public void readFields(final DataInput in) throws IOException {
248     this.columnFamily = Bytes.readByteArray(in);
249     if(this.columnFamily.length == 0) {
250       this.columnFamily = null;
251     }
252     this.columnQualifier = Bytes.readByteArray(in);
253     if(this.columnQualifier.length == 0) {
254       this.columnQualifier = null;
255     }
256     this.compareOp = CompareOp.valueOf(in.readUTF());
257     this.comparator =
258       (WritableByteArrayComparable)HbaseObjectWritable.readObject(in, null);
259     this.foundColumn = in.readBoolean();
260     this.matchedColumn = in.readBoolean();
261     this.filterIfMissing = in.readBoolean();
262     this.latestVersionOnly = in.readBoolean();
263   }
264 
265   public void write(final DataOutput out) throws IOException {
266     Bytes.writeByteArray(out, this.columnFamily);
267     Bytes.writeByteArray(out, this.columnQualifier);
268     out.writeUTF(compareOp.name());
269     HbaseObjectWritable.writeObject(out, comparator,
270         WritableByteArrayComparable.class, null);
271     out.writeBoolean(foundColumn);
272     out.writeBoolean(matchedColumn);
273     out.writeBoolean(filterIfMissing);
274     out.writeBoolean(latestVersionOnly);
275   }
276 }