View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.filter;
20  
21  import com.google.protobuf.InvalidProtocolBufferException;
22  import org.apache.commons.logging.Log;
23  import org.apache.commons.logging.LogFactory;
24  import org.apache.hadoop.classification.InterfaceAudience;
25  import org.apache.hadoop.classification.InterfaceStability;
26  import org.apache.hadoop.hbase.HConstants;
27  import org.apache.hadoop.hbase.exceptions.DeserializationException;
28  import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos;
29  import org.apache.hadoop.hbase.util.Bytes;
30  
31  import java.nio.charset.Charset;
32  import java.nio.charset.IllegalCharsetNameException;
33  import java.util.regex.Pattern;
34  
35  /**
36   * This comparator is for use with {@link CompareFilter} implementations, such
37   * as {@link RowFilter}, {@link QualifierFilter}, and {@link ValueFilter}, for
38   * filtering based on the value of a given column. Use it to test if a given
39   * regular expression matches a cell value in the column.
40   * <p>
41   * Only EQUAL or NOT_EQUAL comparisons are valid with this comparator.
42   * <p>
43   * For example:
44   * <p>
45   * <pre>
46   * ValueFilter vf = new ValueFilter(CompareOp.EQUAL,
47   *     new RegexStringComparator(
48   *       // v4 IP address
49   *       "(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3,3}" +
50   *         "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))(\\/[0-9]+)?" +
51   *         "|" +
52   *       // v6 IP address
53   *       "((([\\dA-Fa-f]{1,4}:){7}[\\dA-Fa-f]{1,4})(:([\\d]{1,3}.)" +
54   *         "{3}[\\d]{1,3})?)(\\/[0-9]+)?"));
55   * </pre>
56   * <p>
57   * Supports {@link java.util.regex.Pattern} flags as well:
58   * <p>
59   * <pre>
60   * ValueFilter vf = new ValueFilter(CompareOp.EQUAL,
61   *     new RegexStringComparator("regex", Pattern.CASE_INSENSITIVE | Pattern.DOTALL));
62   * </pre>
63   * @see java.util.regex.Pattern
64   */
65  @InterfaceAudience.Public
66  @InterfaceStability.Stable
67  public class RegexStringComparator extends ByteArrayComparable {
68  
69    private static final Log LOG = LogFactory.getLog(RegexStringComparator.class);
70  
71    private Charset charset = HConstants.UTF8_CHARSET;
72  
73    private Pattern pattern;
74  
75    /**
76     * Constructor
77     * Adds Pattern.DOTALL to the underlying Pattern
78     * @param expr a valid regular expression
79     */
80    public RegexStringComparator(String expr) {
81      this(expr, Pattern.DOTALL);
82    }
83  
84    /**
85     * Constructor
86     * @param expr a valid regular expression
87     * @param flags java.util.regex.Pattern flags
88     */
89    public RegexStringComparator(String expr, int flags) {
90      super(Bytes.toBytes(expr));
91      this.pattern = Pattern.compile(expr, flags);
92    }
93  
94    /**
95     * Specifies the {@link Charset} to use to convert the row key to a String.
96     * <p>
97     * The row key needs to be converted to a String in order to be matched
98     * against the regular expression.  This method controls which charset is
99     * used to do this conversion.
100    * <p>
101    * If the row key is made of arbitrary bytes, the charset {@code ISO-8859-1}
102    * is recommended.
103    * @param charset The charset to use.
104    */
105   public void setCharset(final Charset charset) {
106     this.charset = charset;
107   }
108 
109   @Override
110   public int compareTo(byte[] value, int offset, int length) {
111     // Use find() for subsequence match instead of matches() (full sequence
112     // match) to adhere to the principle of least surprise.
113     return pattern.matcher(new String(value, offset, length, charset)).find() ? 0
114         : 1;
115   }
116 
117   /**
118    * @return The comparator serialized using pb
119    */
120   public byte [] toByteArray() {
121     ComparatorProtos.RegexStringComparator.Builder builder =
122       ComparatorProtos.RegexStringComparator.newBuilder();
123     builder.setPattern(pattern.toString());
124     builder.setPatternFlags(pattern.flags());
125     builder.setCharset(charset.name());
126     return builder.build().toByteArray();
127   }
128 
129   /**
130    * @param pbBytes A pb serialized {@link RegexStringComparator} instance
131    * @return An instance of {@link RegexStringComparator} made from <code>bytes</code>
132    * @throws DeserializationException
133    * @see #toByteArray
134    */
135   public static RegexStringComparator parseFrom(final byte [] pbBytes)
136   throws DeserializationException {
137     ComparatorProtos.RegexStringComparator proto;
138     try {
139       proto = ComparatorProtos.RegexStringComparator.parseFrom(pbBytes);
140     } catch (InvalidProtocolBufferException e) {
141       throw new DeserializationException(e);
142     }
143 
144     RegexStringComparator comparator =
145       new RegexStringComparator(proto.getPattern(), proto.getPatternFlags());
146     final String charset = proto.getCharset();
147     if (charset.length() > 0) {
148       try {
149         comparator.setCharset(Charset.forName(charset));
150       } catch (IllegalCharsetNameException e) {
151         LOG.error("invalid charset", e);
152       }
153     }
154     return comparator;
155   }
156 
157   /**
158    * @param other
159    * @return true if and only if the fields of the comparator that are serialized
160    * are equal to the corresponding fields in other.  Used for testing.
161    */
162   boolean areSerializedFieldsEqual(ByteArrayComparable other) {
163     if (other == this) return true;
164     if (!(other instanceof RegexStringComparator)) return false;
165 
166     RegexStringComparator comparator = (RegexStringComparator)other;
167     return super.areSerializedFieldsEqual(comparator)
168       && this.pattern.toString().equals(comparator.pattern.toString())
169       && this.pattern.flags() == comparator.pattern.flags()
170       && this.charset.equals(comparator.charset);
171   }
172 }