%line | %branch | |||||||||
---|---|---|---|---|---|---|---|---|---|---|
org.apache.commons.validator.UrlValidator |
|
|
1 | /* |
|
2 | * $Id: UrlValidator.java 327148 2005-10-21 10:50:20Z niallp $ |
|
3 | * $Rev: 327148 $ |
|
4 | * $Date: 2005-10-21 11:50:20 +0100 (Fri, 21 Oct 2005) $ |
|
5 | * |
|
6 | * ==================================================================== |
|
7 | * Copyright 2001-2005 The Apache Software Foundation |
|
8 | * |
|
9 | * Licensed under the Apache License, Version 2.0 (the "License"); |
|
10 | * you may not use this file except in compliance with the License. |
|
11 | * You may obtain a copy of the License at |
|
12 | * |
|
13 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
14 | * |
|
15 | * Unless required by applicable law or agreed to in writing, software |
|
16 | * distributed under the License is distributed on an "AS IS" BASIS, |
|
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
18 | * See the License for the specific language governing permissions and |
|
19 | * limitations under the License. |
|
20 | */ |
|
21 | ||
22 | package org.apache.commons.validator; |
|
23 | ||
24 | import java.io.Serializable; |
|
25 | import java.util.Arrays; |
|
26 | import java.util.HashSet; |
|
27 | import java.util.Set; |
|
28 | ||
29 | import org.apache.commons.validator.util.Flags; |
|
30 | import org.apache.oro.text.perl.Perl5Util; |
|
31 | ||
32 | /** |
|
33 | * <p>Validates URLs.</p> |
|
34 | * Behavour of validation is modified by passing in options: |
|
35 | * <li>ALLOW_2_SLASHES - [FALSE] Allows double '/' characters in the path |
|
36 | * component.</li> |
|
37 | * <li>NO_FRAGMENT- [FALSE] By default fragments are allowed, if this option is |
|
38 | * included then fragments are flagged as illegal.</li> |
|
39 | * <li>ALLOW_ALL_SCHEMES - [FALSE] By default only http, https, and ftp are |
|
40 | * considered valid schemes. Enabling this option will let any scheme pass validation.</li> |
|
41 | * |
|
42 | * <p>Originally based in on php script by Debbie Dyer, validation.php v1.2b, Date: 03/07/02, |
|
43 | * http://javascript.internet.com. However, this validation now bears little resemblance |
|
44 | * to the php original.</p> |
|
45 | * <pre> |
|
46 | * Example of usage: |
|
47 | * Construct a UrlValidator with valid schemes of "http", and "https". |
|
48 | * |
|
49 | * String[] schemes = {"http","https"}. |
|
50 | * UrlValidator urlValidator = new UrlValidator(schemes); |
|
51 | * if (urlValidator.isValid("ftp://foo.bar.com/")) { |
|
52 | * System.out.println("url is valid"); |
|
53 | * } else { |
|
54 | * System.out.println("url is invalid"); |
|
55 | * } |
|
56 | * |
|
57 | * prints "url is invalid" |
|
58 | * If instead the default constructor is used. |
|
59 | * |
|
60 | * UrlValidator urlValidator = new UrlValidator(); |
|
61 | * if (urlValidator.isValid("ftp://foo.bar.com/")) { |
|
62 | * System.out.println("url is valid"); |
|
63 | * } else { |
|
64 | * System.out.println("url is invalid"); |
|
65 | * } |
|
66 | * |
|
67 | * prints out "url is valid" |
|
68 | * </pre> |
|
69 | * |
|
70 | * @see |
|
71 | * <a href='http://www.ietf.org/rfc/rfc2396.txt' > |
|
72 | * Uniform Resource Identifiers (URI): Generic Syntax |
|
73 | * </a> |
|
74 | * |
|
75 | * @since Validator 1.1 |
|
76 | */ |
|
77 | public class UrlValidator implements Serializable { |
|
78 | ||
79 | /** |
|
80 | * Allows all validly formatted schemes to pass validation instead of |
|
81 | * supplying a set of valid schemes. |
|
82 | */ |
|
83 | public static final int ALLOW_ALL_SCHEMES = 1 << 0; |
|
84 | ||
85 | /** |
|
86 | * Allow two slashes in the path component of the URL. |
|
87 | */ |
|
88 | public static final int ALLOW_2_SLASHES = 1 << 1; |
|
89 | ||
90 | /** |
|
91 | * Enabling this options disallows any URL fragments. |
|
92 | */ |
|
93 | public static final int NO_FRAGMENTS = 1 << 2; |
|
94 | ||
95 | private static final String ALPHA_CHARS = "a-zA-Z"; |
|
96 | ||
97 | private static final String ALPHA_NUMERIC_CHARS = ALPHA_CHARS + "\\d"; |
|
98 | ||
99 | private static final String SPECIAL_CHARS = ";/@&=,.?:+$"; |
|
100 | ||
101 | private static final String VALID_CHARS = "[^\\s" + SPECIAL_CHARS + "]"; |
|
102 | ||
103 | private static final String SCHEME_CHARS = ALPHA_CHARS; |
|
104 | ||
105 | // Drop numeric, and "+-." for now |
|
106 | private static final String AUTHORITY_CHARS = ALPHA_NUMERIC_CHARS + "\\-\\."; |
|
107 | ||
108 | private static final String ATOM = VALID_CHARS + '+'; |
|
109 | ||
110 | /** |
|
111 | * This expression derived/taken from the BNF for URI (RFC2396). |
|
112 | */ |
|
113 | private static final String URL_PATTERN = |
|
114 | "/^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?/"; |
|
115 | // 12 3 4 5 6 7 8 9 |
|
116 | ||
117 | /** |
|
118 | * Schema/Protocol (ie. http:, ftp:, file:, etc). |
|
119 | */ |
|
120 | private static final int PARSE_URL_SCHEME = 2; |
|
121 | ||
122 | /** |
|
123 | * Includes hostname/ip and port number. |
|
124 | */ |
|
125 | private static final int PARSE_URL_AUTHORITY = 4; |
|
126 | ||
127 | private static final int PARSE_URL_PATH = 5; |
|
128 | ||
129 | private static final int PARSE_URL_QUERY = 7; |
|
130 | ||
131 | private static final int PARSE_URL_FRAGMENT = 9; |
|
132 | ||
133 | /** |
|
134 | * Protocol (ie. http:, ftp:,https:). |
|
135 | */ |
|
136 | private static final String SCHEME_PATTERN = "/^[" + SCHEME_CHARS + "]/"; |
|
137 | ||
138 | private static final String AUTHORITY_PATTERN = |
|
139 | "/^([" + AUTHORITY_CHARS + "]*)(:\\d*)?(.*)?/"; |
|
140 | // 1 2 3 4 |
|
141 | ||
142 | private static final int PARSE_AUTHORITY_HOST_IP = 1; |
|
143 | ||
144 | private static final int PARSE_AUTHORITY_PORT = 2; |
|
145 | ||
146 | /** |
|
147 | * Should always be empty. |
|
148 | */ |
|
149 | private static final int PARSE_AUTHORITY_EXTRA = 3; |
|
150 | ||
151 | private static final String PATH_PATTERN = "/^(/[-\\w:@&?=+,.!/~*'%$]*)?$/"; |
|
152 | ||
153 | private static final String QUERY_PATTERN = "/^(.*)$/"; |
|
154 | ||
155 | private static final String LEGAL_ASCII_PATTERN = "/^[\\000-\\177]+$/"; |
|
156 | ||
157 | private static final String IP_V4_DOMAIN_PATTERN = |
|
158 | "/^(\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})$/"; |
|
159 | ||
160 | private static final String DOMAIN_PATTERN = |
|
161 | "/^" + ATOM + "(\\." + ATOM + ")*$/"; |
|
162 | ||
163 | private static final String PORT_PATTERN = "/^:(\\d{1,5})$/"; |
|
164 | ||
165 | private static final String ATOM_PATTERN = "/(" + ATOM + ")/"; |
|
166 | ||
167 | private static final String ALPHA_PATTERN = "/^[" + ALPHA_CHARS + "]/"; |
|
168 | ||
169 | /** |
|
170 | * Holds the set of current validation options. |
|
171 | */ |
|
172 | 18 | private Flags options = null; |
173 | ||
174 | /** |
|
175 | * The set of schemes that are allowed to be in a URL. |
|
176 | */ |
|
177 | 18 | private Set allowedSchemes = new HashSet(); |
178 | ||
179 | /** |
|
180 | * If no schemes are provided, default to this set. |
|
181 | */ |
|
182 | 18 | protected String[] defaultSchemes = {"http", "https", "ftp"}; |
183 | ||
184 | /** |
|
185 | * Create a UrlValidator with default properties. |
|
186 | */ |
|
187 | public UrlValidator() { |
|
188 | 15 | this(null); |
189 | 15 | } |
190 | ||
191 | /** |
|
192 | * Behavior of validation is modified by passing in several strings options: |
|
193 | * @param schemes Pass in one or more url schemes to consider valid, passing in |
|
194 | * a null will default to "http,https,ftp" being valid. |
|
195 | * If a non-null schemes is specified then all valid schemes must |
|
196 | * be specified. Setting the ALLOW_ALL_SCHEMES option will |
|
197 | * ignore the contents of schemes. |
|
198 | */ |
|
199 | public UrlValidator(String[] schemes) { |
|
200 | 15 | this(schemes, 0); |
201 | 15 | } |
202 | ||
203 | /** |
|
204 | * Initialize a UrlValidator with the given validation options. |
|
205 | * @param options The options should be set using the public constants declared in |
|
206 | * this class. To set multiple options you simply add them together. For example, |
|
207 | * ALLOW_2_SLASHES + NO_FRAGMENTS enables both of those options. |
|
208 | */ |
|
209 | public UrlValidator(int options) { |
|
210 | 0 | this(null, options); |
211 | 0 | } |
212 | ||
213 | /** |
|
214 | * Behavour of validation is modified by passing in options: |
|
215 | * @param schemes The set of valid schemes. |
|
216 | * @param options The options should be set using the public constants declared in |
|
217 | * this class. To set multiple options you simply add them together. For example, |
|
218 | * ALLOW_2_SLASHES + NO_FRAGMENTS enables both of those options. |
|
219 | */ |
|
220 | 18 | public UrlValidator(String[] schemes, int options) { |
221 | 18 | this.options = new Flags(options); |
222 | ||
223 | 18 | if (this.options.isOn(ALLOW_ALL_SCHEMES)) { |
224 | 2 | return; |
225 | } |
|
226 | ||
227 | 16 | if (schemes == null) { |
228 | 15 | schemes = this.defaultSchemes; |
229 | } |
|
230 | ||
231 | 16 | this.allowedSchemes.addAll(Arrays.asList(schemes)); |
232 | 16 | } |
233 | ||
234 | /** |
|
235 | * <p>Checks if a field has a valid url address.</p> |
|
236 | * |
|
237 | * @param value The value validation is being performed on. A <code>null</code> |
|
238 | * value is considered invalid. |
|
239 | * @return true if the url is valid. |
|
240 | */ |
|
241 | public boolean isValid(String value) { |
|
242 | 75604 | if (value == null) { |
243 | 0 | return false; |
244 | } |
|
245 | ||
246 | 75604 | Perl5Util matchUrlPat = new Perl5Util(); |
247 | 75604 | Perl5Util matchAsciiPat = new Perl5Util(); |
248 | ||
249 | 75604 | if (!matchAsciiPat.match(LEGAL_ASCII_PATTERN, value)) { |
250 | 0 | return false; |
251 | } |
|
252 | ||
253 | // Check the whole url address structure |
|
254 | 75604 | if (!matchUrlPat.match(URL_PATTERN, value)) { |
255 | 0 | return false; |
256 | } |
|
257 | ||
258 | 75604 | if (!isValidScheme(matchUrlPat.group(PARSE_URL_SCHEME))) { |
259 | 28350 | return false; |
260 | } |
|
261 | ||
262 | 47254 | if (!isValidAuthority(matchUrlPat.group(PARSE_URL_AUTHORITY))) { |
263 | 39375 | return false; |
264 | } |
|
265 | ||
266 | 7879 | if (!isValidPath(matchUrlPat.group(PARSE_URL_PATH))) { |
267 | 2520 | return false; |
268 | } |
|
269 | ||
270 | 5359 | if (!isValidQuery(matchUrlPat.group(PARSE_URL_QUERY))) { |
271 | 0 | return false; |
272 | } |
|
273 | ||
274 | 5359 | if (!isValidFragment(matchUrlPat.group(PARSE_URL_FRAGMENT))) { |
275 | 630 | return false; |
276 | } |
|
277 | ||
278 | 4729 | return true; |
279 | } |
|
280 | ||
281 | /** |
|
282 | * Validate scheme. If schemes[] was initialized to a non null, |
|
283 | * then only those scheme's are allowed. Note this is slightly different |
|
284 | * than for the constructor. |
|
285 | * @param scheme The scheme to validate. A <code>null</code> value is considered |
|
286 | * invalid. |
|
287 | * @return true if valid. |
|
288 | */ |
|
289 | protected boolean isValidScheme(String scheme) { |
|
290 | 75608 | if (scheme == null) { |
291 | 18900 | return false; |
292 | } |
|
293 | ||
294 | 56708 | Perl5Util schemeMatcher = new Perl5Util(); |
295 | 56708 | if (!schemeMatcher.match(SCHEME_PATTERN, scheme)) { |
296 | 9450 | return false; |
297 | } |
|
298 | ||
299 | 47258 | if (this.options.isOff(ALLOW_ALL_SCHEMES)) { |
300 | ||
301 | 4 | if (!this.allowedSchemes.contains(scheme)) { |
302 | 3 | return false; |
303 | } |
|
304 | } |
|
305 | ||
306 | 47255 | return true; |
307 | } |
|
308 | ||
309 | /** |
|
310 | * Returns true if the authority is properly formatted. An authority is the combination |
|
311 | * of hostname and port. A <code>null</code> authority value is considered invalid. |
|
312 | * @param authority Authority value to validate. |
|
313 | * @return true if authority (hostname and port) is valid. |
|
314 | */ |
|
315 | protected boolean isValidAuthority(String authority) { |
|
316 | 47254 | if (authority == null) { |
317 | 18831 | return false; |
318 | } |
|
319 | ||
320 | 28423 | Perl5Util authorityMatcher = new Perl5Util(); |
321 | 28423 | Perl5Util matchIPV4Pat = new Perl5Util(); |
322 | ||
323 | 28423 | if (!authorityMatcher.match(AUTHORITY_PATTERN, authority)) { |
324 | 0 | return false; |
325 | } |
|
326 | ||
327 | 28423 | boolean ipV4Address = false; |
328 | 28423 | boolean hostname = false; |
329 | // check if authority is IP address or hostname |
|
330 | 28423 | String hostIP = authorityMatcher.group(PARSE_AUTHORITY_HOST_IP); |
331 | 28423 | ipV4Address = matchIPV4Pat.match(IP_V4_DOMAIN_PATTERN, hostIP); |
332 | ||
333 | 28423 | if (ipV4Address) { |
334 | // this is an IP address so check components |
|
335 | 17325 | for (int i = 1; i <= 4; i++) { |
336 | 14175 | String ipSegment = matchIPV4Pat.group(i); |
337 | 14175 | if (ipSegment == null || ipSegment.length() <= 0) { |
338 | 0 | return false; |
339 | } |
|
340 | ||
341 | try { |
|
342 | 14175 | if (Integer.parseInt(ipSegment) > 255) { |
343 | 1575 | return false; |
344 | } |
|
345 | 12600 | } catch(NumberFormatException e) { |
346 | 0 | return false; |
347 | } |
|
348 | ||
349 | } |
|
350 | } else { |
|
351 | // Domain is hostname name |
|
352 | 23698 | Perl5Util domainMatcher = new Perl5Util(); |
353 | 23698 | hostname = domainMatcher.match(DOMAIN_PATTERN, hostIP); |
354 | } |
|
355 | ||
356 | //rightmost hostname will never start with a digit. |
|
357 | 26848 | if (hostname) { |
358 | 15787 | String[] domainSegment = new String[10]; |
359 | 15787 | boolean match = true; |
360 | 15787 | int segmentCount = 0; |
361 | 15787 | int segmentLength = 0; |
362 | 15787 | Perl5Util atomMatcher = new Perl5Util(); |
363 | ||
364 | 85206 | while (match) { |
365 | 53632 | match = atomMatcher.match(ATOM_PATTERN, hostIP); |
366 | 53632 | if (match) { |
367 | 37845 | domainSegment[segmentCount] = atomMatcher.group(1); |
368 | 37845 | segmentLength = domainSegment[segmentCount].length() + 1; |
369 | 37845 | hostIP = |
370 | (segmentLength >= hostIP.length()) |
|
371 | ? "" |
|
372 | : hostIP.substring(segmentLength); |
|
373 | ||
374 | 37845 | segmentCount++; |
375 | } |
|
376 | } |
|
377 | 15787 | String topLevel = domainSegment[segmentCount - 1]; |
378 | 15787 | if (topLevel.length() < 2 || topLevel.length() > 4) { |
379 | 4749 | return false; |
380 | } |
|
381 | ||
382 | // First letter of top level must be a alpha |
|
383 | 11038 | Perl5Util alphaMatcher = new Perl5Util(); |
384 | 11038 | if (!alphaMatcher.match(ALPHA_PATTERN, topLevel.substring(0, 1))) { |
385 | 1575 | return false; |
386 | } |
|
387 | ||
388 | // Make sure there's a host name preceding the authority. |
|
389 | 9463 | if (segmentCount < 2) { |
390 | 1584 | return false; |
391 | } |
|
392 | } |
|
393 | ||
394 | 18940 | if (!hostname && !ipV4Address) { |
395 | 7911 | return false; |
396 | } |
|
397 | ||
398 | 11029 | String port = authorityMatcher.group(PARSE_AUTHORITY_PORT); |
399 | 11029 | if (port != null) { |
400 | 9450 | Perl5Util portMatcher = new Perl5Util(); |
401 | 9450 | if (!portMatcher.match(PORT_PATTERN, port)) { |
402 | 1575 | return false; |
403 | } |
|
404 | } |
|
405 | ||
406 | 9454 | String extra = authorityMatcher.group(PARSE_AUTHORITY_EXTRA); |
407 | 9454 | if (!GenericValidator.isBlankOrNull(extra)) { |
408 | 1575 | return false; |
409 | } |
|
410 | ||
411 | 7879 | return true; |
412 | } |
|
413 | ||
414 | /** |
|
415 | * Returns true if the path is valid. A <code>null</code> value is considered invalid. |
|
416 | * @param path Path value to validate. |
|
417 | * @return true if path is valid. |
|
418 | */ |
|
419 | protected boolean isValidPath(String path) { |
|
420 | 7879 | if (path == null) { |
421 | 0 | return false; |
422 | } |
|
423 | ||
424 | 7879 | Perl5Util pathMatcher = new Perl5Util(); |
425 | ||
426 | 7879 | if (!pathMatcher.match(PATH_PATTERN, path)) { |
427 | 0 | return false; |
428 | } |
|
429 | ||
430 | 7879 | int slash2Count = countToken("//", path); |
431 | 7879 | if (this.options.isOff(ALLOW_2_SLASHES) && (slash2Count > 0)) { |
432 | 630 | return false; |
433 | } |
|
434 | ||
435 | 7249 | int slashCount = countToken("/", path); |
436 | 7249 | int dot2Count = countToken("..", path); |
437 | 7249 | if (dot2Count > 0) { |
438 | 1890 | if ((slashCount - slash2Count - 1) <= dot2Count) { |
439 | 1890 | return false; |
440 | } |
|
441 | } |
|
442 | ||
443 | 5359 | return true; |
444 | } |
|
445 | ||
446 | /** |
|
447 | * Returns true if the query is null or it's a properly formatted query string. |
|
448 | * @param query Query value to validate. |
|
449 | * @return true if query is valid. |
|
450 | */ |
|
451 | protected boolean isValidQuery(String query) { |
|
452 | 5359 | if (query == null) { |
453 | 2209 | return true; |
454 | } |
|
455 | ||
456 | 3150 | Perl5Util queryMatcher = new Perl5Util(); |
457 | 3150 | return queryMatcher.match(QUERY_PATTERN, query); |
458 | } |
|
459 | ||
460 | /** |
|
461 | * Returns true if the given fragment is null or fragments are allowed. |
|
462 | * @param fragment Fragment value to validate. |
|
463 | * @return true if fragment is valid. |
|
464 | */ |
|
465 | protected boolean isValidFragment(String fragment) { |
|
466 | 5359 | if (fragment == null) { |
467 | 4729 | return true; |
468 | } |
|
469 | ||
470 | 630 | return this.options.isOff(NO_FRAGMENTS); |
471 | } |
|
472 | ||
473 | /** |
|
474 | * Returns the number of times the token appears in the target. |
|
475 | * @param token Token value to be counted. |
|
476 | * @param target Target value to count tokens in. |
|
477 | * @return the number of tokens. |
|
478 | */ |
|
479 | protected int countToken(String token, String target) { |
|
480 | 22377 | int tokenIndex = 0; |
481 | 22377 | int count = 0; |
482 | 80993 | while (tokenIndex != -1) { |
483 | 36239 | tokenIndex = target.indexOf(token, tokenIndex); |
484 | 36239 | if (tokenIndex > -1) { |
485 | 13862 | tokenIndex++; |
486 | 13862 | count++; |
487 | } |
|
488 | } |
|
489 | 22377 | return count; |
490 | } |
|
491 | } |
This report is generated by jcoverage, Maven and Maven JCoverage Plugin. |