View Javadoc

1   /*
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.filter;
22  
23  import org.apache.hadoop.hbase.KeyValue;
24  import org.apache.hadoop.io.Writable;
25  
26  import java.util.List;
27  
28  /**
29   * Interface for row and column filters directly applied within the regionserver.
30   * A filter can expect the following call sequence:
31   *<ul>
32   * <li>{@link #reset()}</li>
33   * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
34   * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
35   * if false, we will also call</li>
36   * <li>{@link #filterKeyValue(KeyValue)} -> true to drop this key/value</li>
37   * <li>{@link #filterRow(List)} -> allows directmodification of the final list to be submitted
38   * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
39   * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
40   * </li>
41   * </ul>
42   *
43   * Filter instances are created one per region/scan.  This interface replaces
44   * the old RowFilterInterface.
45   *
46   * When implementing your own filters, consider inheriting {@link FilterBase} to help
47   * you reduce boilerplate.
48   * 
49   * @see FilterBase
50   */
51  public interface Filter extends Writable {
52    /**
53     * Reset the state of the filter between rows.
54     */
55    public void reset();
56  
57    /**
58     * Filters a row based on the row key. If this returns true, the entire
59     * row will be excluded.  If false, each KeyValue in the row will be
60     * passed to {@link #filterKeyValue(KeyValue)} below.
61     *
62     * @param buffer buffer containing row key
63     * @param offset offset into buffer where row key starts
64     * @param length length of the row key
65     * @return true, remove entire row, false, include the row (maybe).
66     */
67    public boolean filterRowKey(byte [] buffer, int offset, int length);
68  
69    /**
70     * If this returns true, the scan will terminate.
71     *
72     * @return true to end scan, false to continue.
73     */
74    public boolean filterAllRemaining();
75  
76    /**
77     * A way to filter based on the column family, column qualifier and/or the
78     * column value. Return code is described below.  This allows filters to
79     * filter only certain number of columns, then terminate without matching ever
80     * column.
81     *
82     * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
83     * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called
84     * just in case the caller calls for the next row.
85     *
86     * @param v the KeyValue in question
87     * @return code as described below
88     * @see Filter.ReturnCode
89     */
90    public ReturnCode filterKeyValue(final KeyValue v);
91  
92    /**
93     * Give the filter a chance to transform the passed KeyValue.
94     * If the KeyValue is changed a new KeyValue object must be returned.
95     * @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
96     *
97     * The transformed KeyValue is what is eventually returned to the
98     * client. Most filters will return the passed KeyValue unchanged.
99     * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transform(KeyValue)
100    * for an example of a transformation.
101    *
102    * @param v the KeyValue in question
103    * @return the changed KeyValue
104    */
105   public KeyValue transform(final KeyValue v);
106 
107   /**
108    * Return codes for filterValue().
109    */
110   public enum ReturnCode {
111     /**
112      * Include the KeyValue
113      */
114     INCLUDE,
115     /**
116      * Include the KeyValue and seek to the next column skipping older versions.
117      */
118     INCLUDE_AND_NEXT_COL,
119     /**
120      * Skip this KeyValue
121      */
122     SKIP,
123     /**
124      * Skip this column. Go to the next column in this row.
125      */
126     NEXT_COL,
127     /**
128      * Done with columns, skip to next row. Note that filterRow() will
129      * still be called.
130      */
131     NEXT_ROW,
132     /**
133      * Seek to next key which is given as hint by the filter.
134      */
135     SEEK_NEXT_USING_HINT,
136 }
137 
138   /**
139    * Chance to alter the list of keyvalues to be submitted.
140    * Modifications to the list will carry on
141    * @param kvs the list of keyvalues to be filtered
142    */
143   public void filterRow(List<KeyValue> kvs);
144 
145   /**
146    * @return True if this filter actively uses filterRow(List).
147    * Primarily used to check for conflicts with scans(such as scans
148    * that do not read a full row at a time)
149    */
150   public boolean hasFilterRow();
151 
152   /**
153    * Last chance to veto row based on previous {@link #filterKeyValue(KeyValue)}
154    * calls. The filter needs to retain state then return a particular value for
155    * this call if they wish to exclude a row if a certain column is missing
156    * (for example).
157    * @return true to exclude row, false to include row.
158    */
159   public boolean filterRow();
160 
161   /**
162    * If the filter returns the match code SEEK_NEXT_USING_HINT, then
163    * it should also tell which is the next key it must seek to.
164    * After receiving the match code SEEK_NEXT_USING_HINT, the QueryMatcher would
165    * call this function to find out which key it must next seek to.
166    * @return KeyValue which must be next seeked. return null if the filter is
167    * not sure which key to seek to next.
168    */
169   public KeyValue getNextKeyHint(final KeyValue currentKV);
170 }