1 /**
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.filter;
20
21 import com.google.protobuf.InvalidProtocolBufferException;
22 import org.apache.commons.logging.Log;
23 import org.apache.commons.logging.LogFactory;
24 import org.apache.hadoop.classification.InterfaceAudience;
25 import org.apache.hadoop.classification.InterfaceStability;
26 import org.apache.hadoop.hbase.HConstants;
27 import org.apache.hadoop.hbase.exceptions.DeserializationException;
28 import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos;
29 import org.apache.hadoop.hbase.util.Bytes;
30
31 import java.nio.charset.Charset;
32 import java.nio.charset.IllegalCharsetNameException;
33 import java.util.Arrays;
34 import java.util.regex.Pattern;
35
36 /**
37 * This comparator is for use with {@link CompareFilter} implementations, such
38 * as {@link RowFilter}, {@link QualifierFilter}, and {@link ValueFilter}, for
39 * filtering based on the value of a given column. Use it to test if a given
40 * regular expression matches a cell value in the column.
41 * <p>
42 * Only EQUAL or NOT_EQUAL comparisons are valid with this comparator.
43 * <p>
44 * For example:
45 * <p>
46 * <pre>
47 * ValueFilter vf = new ValueFilter(CompareOp.EQUAL,
48 * new RegexStringComparator(
49 * // v4 IP address
50 * "(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3,3}" +
51 * "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))(\\/[0-9]+)?" +
52 * "|" +
53 * // v6 IP address
54 * "((([\\dA-Fa-f]{1,4}:){7}[\\dA-Fa-f]{1,4})(:([\\d]{1,3}.)" +
55 * "{3}[\\d]{1,3})?)(\\/[0-9]+)?"));
56 * </pre>
57 * <p>
58 * Supports {@link java.util.regex.Pattern} flags as well:
59 * <p>
60 * <pre>
61 * ValueFilter vf = new ValueFilter(CompareOp.EQUAL,
62 * new RegexStringComparator("regex", Pattern.CASE_INSENSITIVE | Pattern.DOTALL));
63 * </pre>
64 * @see java.util.regex.Pattern
65 */
66 @InterfaceAudience.Public
67 @InterfaceStability.Stable
68 public class RegexStringComparator extends ByteArrayComparable {
69
70 private static final Log LOG = LogFactory.getLog(RegexStringComparator.class);
71
72 private Charset charset = HConstants.UTF8_CHARSET;
73
74 private Pattern pattern;
75
76 /**
77 * Constructor
78 * Adds Pattern.DOTALL to the underlying Pattern
79 * @param expr a valid regular expression
80 */
81 public RegexStringComparator(String expr) {
82 this(expr, Pattern.DOTALL);
83 }
84
85 /**
86 * Constructor
87 * @param expr a valid regular expression
88 * @param flags java.util.regex.Pattern flags
89 */
90 public RegexStringComparator(String expr, int flags) {
91 super(Bytes.toBytes(expr));
92 this.pattern = Pattern.compile(expr, flags);
93 }
94
95 /**
96 * Specifies the {@link Charset} to use to convert the row key to a String.
97 * <p>
98 * The row key needs to be converted to a String in order to be matched
99 * against the regular expression. This method controls which charset is
100 * used to do this conversion.
101 * <p>
102 * If the row key is made of arbitrary bytes, the charset {@code ISO-8859-1}
103 * is recommended.
104 * @param charset The charset to use.
105 */
106 public void setCharset(final Charset charset) {
107 this.charset = charset;
108 }
109
110 @Override
111 public int compareTo(byte[] value, int offset, int length) {
112 // Use find() for subsequence match instead of matches() (full sequence
113 // match) to adhere to the principle of least surprise.
114 String tmp;
115 if (length < value.length / 2) {
116 // See HBASE-9428. Make a copy of the relevant part of the byte[],
117 // or the JDK will copy the entire byte[] during String decode
118 tmp = new String(Arrays.copyOfRange(value, offset, offset + length), charset);
119 } else {
120 tmp = new String(value, offset, length, charset);
121 }
122 return pattern.matcher(tmp).find() ? 0 : 1;
123 }
124
125 /**
126 * @return The comparator serialized using pb
127 */
128 public byte [] toByteArray() {
129 ComparatorProtos.RegexStringComparator.Builder builder =
130 ComparatorProtos.RegexStringComparator.newBuilder();
131 builder.setPattern(pattern.toString());
132 builder.setPatternFlags(pattern.flags());
133 builder.setCharset(charset.name());
134 return builder.build().toByteArray();
135 }
136
137 /**
138 * @param pbBytes A pb serialized {@link RegexStringComparator} instance
139 * @return An instance of {@link RegexStringComparator} made from <code>bytes</code>
140 * @throws DeserializationException
141 * @see #toByteArray
142 */
143 public static RegexStringComparator parseFrom(final byte [] pbBytes)
144 throws DeserializationException {
145 ComparatorProtos.RegexStringComparator proto;
146 try {
147 proto = ComparatorProtos.RegexStringComparator.parseFrom(pbBytes);
148 } catch (InvalidProtocolBufferException e) {
149 throw new DeserializationException(e);
150 }
151
152 RegexStringComparator comparator =
153 new RegexStringComparator(proto.getPattern(), proto.getPatternFlags());
154 final String charset = proto.getCharset();
155 if (charset.length() > 0) {
156 try {
157 comparator.setCharset(Charset.forName(charset));
158 } catch (IllegalCharsetNameException e) {
159 LOG.error("invalid charset", e);
160 }
161 }
162 return comparator;
163 }
164
165 /**
166 * @param other
167 * @return true if and only if the fields of the comparator that are serialized
168 * are equal to the corresponding fields in other. Used for testing.
169 */
170 boolean areSerializedFieldsEqual(ByteArrayComparable other) {
171 if (other == this) return true;
172 if (!(other instanceof RegexStringComparator)) return false;
173
174 RegexStringComparator comparator = (RegexStringComparator)other;
175 return super.areSerializedFieldsEqual(comparator)
176 && this.pattern.toString().equals(comparator.pattern.toString())
177 && this.pattern.flags() == comparator.pattern.flags()
178 && this.charset.equals(comparator.charset);
179 }
180 }