View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.filter;
21  
22  import org.apache.hadoop.hbase.HConstants;
23  import org.apache.hadoop.hbase.util.Bytes;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  
28  import java.io.DataInput;
29  import java.io.DataOutput;
30  import java.io.IOException;
31  import java.nio.charset.Charset;
32  import java.nio.charset.IllegalCharsetNameException;
33  import java.util.regex.Pattern;
34  
35  /**
36   * This comparator is for use with {@link CompareFilter} implementations, such
37   * as {@link RowFilter}, {@link QualifierFilter}, and {@link ValueFilter}, for
38   * filtering based on the value of a given column. Use it to test if a given
39   * regular expression matches a cell value in the column.
40   * <p>
41   * Only EQUAL or NOT_EQUAL comparisons are valid with this comparator.
42   * <p>
43   * For example:
44   * <p>
45   * <pre>
46   * ValueFilter vf = new ValueFilter(CompareOp.EQUAL,
47   *     new RegexStringComparator(
48   *       // v4 IP address
49   *       "(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3,3}" +
50   *         "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))(\\/[0-9]+)?" +
51   *         "|" +
52   *       // v6 IP address
53   *       "((([\\dA-Fa-f]{1,4}:){7}[\\dA-Fa-f]{1,4})(:([\\d]{1,3}.)" +
54   *         "{3}[\\d]{1,3})?)(\\/[0-9]+)?"));
55   * </pre>
56   */
57  public class RegexStringComparator extends WritableByteArrayComparable {
58  
59    private static final Log LOG = LogFactory.getLog(RegexStringComparator.class);
60  
61    private Charset charset = Charset.forName(HConstants.UTF8_ENCODING);
62  
63    private Pattern pattern;
64  
65    /** Nullary constructor for Writable, do not use */
66    public RegexStringComparator() { }
67  
68    /**
69     * Constructor
70     * @param expr a valid regular expression
71     */
72    public RegexStringComparator(String expr) {
73      super(Bytes.toBytes(expr));
74      this.pattern = Pattern.compile(expr, Pattern.DOTALL);
75    }
76  
77    /**
78     * Specifies the {@link Charset} to use to convert the row key to a String.
79     * <p>
80     * The row key needs to be converted to a String in order to be matched
81     * against the regular expression.  This method controls which charset is
82     * used to do this conversion.
83     * <p>
84     * If the row key is made of arbitrary bytes, the charset {@code ISO-8859-1}
85     * is recommended.
86     * @param charset The charset to use.
87     */
88    public void setCharset(final Charset charset) {
89      this.charset = charset;
90    }
91  
92    @Override
93    public int compareTo(byte[] value) {
94      // Use find() for subsequence match instead of matches() (full sequence
95      // match) to adhere to the principle of least surprise.
96      return pattern.matcher(new String(value, charset)).find() ? 0 : 1;
97    }
98  
99    @Override
100   public void readFields(DataInput in) throws IOException {
101     final String expr = in.readUTF();
102     this.value = Bytes.toBytes(expr);
103     this.pattern = Pattern.compile(expr);
104     final String charset = in.readUTF();
105     if (charset.length() > 0) {
106       try {
107         this.charset = Charset.forName(charset);
108       } catch (IllegalCharsetNameException e) {
109         LOG.error("invalid charset", e);
110       }
111     }
112   }
113 
114   @Override
115   public void write(DataOutput out) throws IOException {
116     out.writeUTF(pattern.toString());
117     out.writeUTF(charset.name());
118   }
119 
120 }