1 /**
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.filter;
20
21 import com.google.protobuf.InvalidProtocolBufferException;
22 import org.apache.commons.logging.Log;
23 import org.apache.commons.logging.LogFactory;
24 import org.apache.hadoop.classification.InterfaceAudience;
25 import org.apache.hadoop.classification.InterfaceStability;
26 import org.apache.hadoop.hbase.HConstants;
27 import org.apache.hadoop.hbase.exceptions.DeserializationException;
28 import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos;
29 import org.apache.hadoop.hbase.util.Bytes;
30
31 import java.nio.charset.Charset;
32 import java.nio.charset.IllegalCharsetNameException;
33 import java.util.regex.Pattern;
34
35 /**
36 * This comparator is for use with {@link CompareFilter} implementations, such
37 * as {@link RowFilter}, {@link QualifierFilter}, and {@link ValueFilter}, for
38 * filtering based on the value of a given column. Use it to test if a given
39 * regular expression matches a cell value in the column.
40 * <p>
41 * Only EQUAL or NOT_EQUAL comparisons are valid with this comparator.
42 * <p>
43 * For example:
44 * <p>
45 * <pre>
46 * ValueFilter vf = new ValueFilter(CompareOp.EQUAL,
47 * new RegexStringComparator(
48 * // v4 IP address
49 * "(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3,3}" +
50 * "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))(\\/[0-9]+)?" +
51 * "|" +
52 * // v6 IP address
53 * "((([\\dA-Fa-f]{1,4}:){7}[\\dA-Fa-f]{1,4})(:([\\d]{1,3}.)" +
54 * "{3}[\\d]{1,3})?)(\\/[0-9]+)?"));
55 * </pre>
56 * <p>
57 * Supports {@link java.util.regex.Pattern} flags as well:
58 * <p>
59 * <pre>
60 * ValueFilter vf = new ValueFilter(CompareOp.EQUAL,
61 * new RegexStringComparator("regex", Pattern.CASE_INSENSITIVE | Pattern.DOTALL));
62 * </pre>
63 * @see java.util.regex.Pattern
64 */
65 @InterfaceAudience.Public
66 @InterfaceStability.Stable
67 public class RegexStringComparator extends ByteArrayComparable {
68
69 private static final Log LOG = LogFactory.getLog(RegexStringComparator.class);
70
71 private Charset charset = HConstants.UTF8_CHARSET;
72
73 private Pattern pattern;
74
75 /**
76 * Constructor
77 * Adds Pattern.DOTALL to the underlying Pattern
78 * @param expr a valid regular expression
79 */
80 public RegexStringComparator(String expr) {
81 this(expr, Pattern.DOTALL);
82 }
83
84 /**
85 * Constructor
86 * @param expr a valid regular expression
87 * @param flags java.util.regex.Pattern flags
88 */
89 public RegexStringComparator(String expr, int flags) {
90 super(Bytes.toBytes(expr));
91 this.pattern = Pattern.compile(expr, flags);
92 }
93
94 /**
95 * Specifies the {@link Charset} to use to convert the row key to a String.
96 * <p>
97 * The row key needs to be converted to a String in order to be matched
98 * against the regular expression. This method controls which charset is
99 * used to do this conversion.
100 * <p>
101 * If the row key is made of arbitrary bytes, the charset {@code ISO-8859-1}
102 * is recommended.
103 * @param charset The charset to use.
104 */
105 public void setCharset(final Charset charset) {
106 this.charset = charset;
107 }
108
109 @Override
110 public int compareTo(byte[] value, int offset, int length) {
111 // Use find() for subsequence match instead of matches() (full sequence
112 // match) to adhere to the principle of least surprise.
113 return pattern.matcher(new String(value, offset, length, charset)).find() ? 0
114 : 1;
115 }
116
117 /**
118 * @return The comparator serialized using pb
119 */
120 public byte [] toByteArray() {
121 ComparatorProtos.RegexStringComparator.Builder builder =
122 ComparatorProtos.RegexStringComparator.newBuilder();
123 builder.setPattern(pattern.toString());
124 builder.setPatternFlags(pattern.flags());
125 builder.setCharset(charset.name());
126 return builder.build().toByteArray();
127 }
128
129 /**
130 * @param pbBytes A pb serialized {@link RegexStringComparator} instance
131 * @return An instance of {@link RegexStringComparator} made from <code>bytes</code>
132 * @throws DeserializationException
133 * @see #toByteArray
134 */
135 public static RegexStringComparator parseFrom(final byte [] pbBytes)
136 throws DeserializationException {
137 ComparatorProtos.RegexStringComparator proto;
138 try {
139 proto = ComparatorProtos.RegexStringComparator.parseFrom(pbBytes);
140 } catch (InvalidProtocolBufferException e) {
141 throw new DeserializationException(e);
142 }
143
144 RegexStringComparator comparator =
145 new RegexStringComparator(proto.getPattern(), proto.getPatternFlags());
146 final String charset = proto.getCharset();
147 if (charset.length() > 0) {
148 try {
149 comparator.setCharset(Charset.forName(charset));
150 } catch (IllegalCharsetNameException e) {
151 LOG.error("invalid charset", e);
152 }
153 }
154 return comparator;
155 }
156
157 /**
158 * @param other
159 * @return true if and only if the fields of the comparator that are serialized
160 * are equal to the corresponding fields in other. Used for testing.
161 */
162 boolean areSerializedFieldsEqual(ByteArrayComparable other) {
163 if (other == this) return true;
164 if (!(other instanceof RegexStringComparator)) return false;
165
166 RegexStringComparator comparator = (RegexStringComparator)other;
167 return super.areSerializedFieldsEqual(comparator)
168 && this.pattern.toString().equals(comparator.pattern.toString())
169 && this.pattern.flags() == comparator.pattern.flags()
170 && this.charset.equals(comparator.charset);
171 }
172 }