001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.util.HashSet; 020import java.util.Set; 021import java.util.regex.Matcher; 022import java.util.regex.Pattern; 023 024import org.apache.commons.lang3.ArrayUtils; 025import org.apache.commons.lang3.StringUtils; 026import org.apache.commons.lang3.Validate; 027 028/** 029 * <p> 030 * Operations on Strings that contain words. 031 * </p> 032 * 033 * <p> 034 * This class tries to handle <code>null</code> input gracefully. An exception will not be thrown for a 035 * <code>null</code> input. Each method documents its behavior in more detail. 036 * </p> 037 * 038 * @since 1.1 039 */ 040public class WordUtils { 041 042 /** 043 * <p><code>WordUtils</code> instances should NOT be constructed in 044 * standard programming. Instead, the class should be used as 045 * <code>WordUtils.wrap("foo bar", 20);</code>.</p> 046 * 047 * <p>This constructor is public to permit tools that require a JavaBean 048 * instance to operate.</p> 049 */ 050 public WordUtils() { 051 super(); 052 } 053 054 // Wrapping 055 //-------------------------------------------------------------------------- 056 /** 057 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p> 058 * 059 * <p>New lines will be separated by the system property line separator. 060 * Very long words, such as URLs will <i>not</i> be wrapped.</p> 061 * 062 * <p>Leading spaces on a new line are stripped. 063 * Trailing spaces are not stripped.</p> 064 * 065 * <table border="1" summary="Wrap Results"> 066 * <tr> 067 * <th>input</th> 068 * <th>wrapLength</th> 069 * <th>result</th> 070 * </tr> 071 * <tr> 072 * <td>null</td> 073 * <td>*</td> 074 * <td>null</td> 075 * </tr> 076 * <tr> 077 * <td>""</td> 078 * <td>*</td> 079 * <td>""</td> 080 * </tr> 081 * <tr> 082 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 083 * <td>20</td> 084 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 085 * </tr> 086 * <tr> 087 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 088 * <td>20</td> 089 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td> 090 * </tr> 091 * <tr> 092 * <td>"Click here, http://commons.apache.org, to jump to the commons website"</td> 093 * <td>20</td> 094 * <td>"Click here,\nhttp://commons.apache.org,\nto jump to the\ncommons website"</td> 095 * </tr> 096 * </table> 097 * 098 * (assuming that '\n' is the systems line separator) 099 * 100 * @param str the String to be word wrapped, may be null 101 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 102 * @return a line with newlines inserted, <code>null</code> if null input 103 */ 104 public static String wrap(final String str, final int wrapLength) { 105 return wrap(str, wrapLength, null, false); 106 } 107 108 /** 109 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p> 110 * 111 * <p>Leading spaces on a new line are stripped. 112 * Trailing spaces are not stripped.</p> 113 * 114 * <table border="1" summary="Wrap Results"> 115 * <tr> 116 * <th>input</th> 117 * <th>wrapLength</th> 118 * <th>newLineString</th> 119 * <th>wrapLongWords</th> 120 * <th>result</th> 121 * </tr> 122 * <tr> 123 * <td>null</td> 124 * <td>*</td> 125 * <td>*</td> 126 * <td>true/false</td> 127 * <td>null</td> 128 * </tr> 129 * <tr> 130 * <td>""</td> 131 * <td>*</td> 132 * <td>*</td> 133 * <td>true/false</td> 134 * <td>""</td> 135 * </tr> 136 * <tr> 137 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 138 * <td>20</td> 139 * <td>"\n"</td> 140 * <td>true/false</td> 141 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 142 * </tr> 143 * <tr> 144 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 145 * <td>20</td> 146 * <td>"<br />"</td> 147 * <td>true/false</td> 148 * <td>"Here is one line of<br />text that is going< 149 * br />to be wrapped after<br />20 columns."</td> 150 * </tr> 151 * <tr> 152 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 153 * <td>20</td> 154 * <td>null</td> 155 * <td>true/false</td> 156 * <td>"Here is one line of" + systemNewLine + "text that is going" 157 * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td> 158 * </tr> 159 * <tr> 160 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 161 * <td>20</td> 162 * <td>"\n"</td> 163 * <td>false</td> 164 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td> 165 * </tr> 166 * <tr> 167 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 168 * <td>20</td> 169 * <td>"\n"</td> 170 * <td>true</td> 171 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td> 172 * </tr> 173 * </table> 174 * 175 * @param str the String to be word wrapped, may be null 176 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 177 * @param newLineStr the string to insert for a new line, 178 * <code>null</code> uses the system property line separator 179 * @param wrapLongWords true if long words (such as URLs) should be wrapped 180 * @return a line with newlines inserted, <code>null</code> if null input 181 */ 182 public static String wrap(final String str, 183 final int wrapLength, 184 final String newLineStr, 185 final boolean wrapLongWords) { 186 return wrap(str, wrapLength, newLineStr, wrapLongWords, " "); 187 } 188 189 /** 190 * <p>Wraps a single line of text, identifying words by <code>wrapOn</code>.</p> 191 * 192 * <p>Leading spaces on a new line are stripped. 193 * Trailing spaces are not stripped.</p> 194 * 195 * <table border="1" summary="Wrap Results"> 196 * <tr> 197 * <th>input</th> 198 * <th>wrapLength</th> 199 * <th>newLineString</th> 200 * <th>wrapLongWords</th> 201 * <th>wrapOn</th> 202 * <th>result</th> 203 * </tr> 204 * <tr> 205 * <td>null</td> 206 * <td>*</td> 207 * <td>*</td> 208 * <td>true/false</td> 209 * <td>*</td> 210 * <td>null</td> 211 * </tr> 212 * <tr> 213 * <td>""</td> 214 * <td>*</td> 215 * <td>*</td> 216 * <td>true/false</td> 217 * <td>*</td> 218 * <td>""</td> 219 * </tr> 220 * <tr> 221 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 222 * <td>20</td> 223 * <td>"\n"</td> 224 * <td>true/false</td> 225 * <td>" "</td> 226 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 227 * </tr> 228 * <tr> 229 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 230 * <td>20</td> 231 * <td>"<br />"</td> 232 * <td>true/false</td> 233 * <td>" "</td> 234 * <td>"Here is one line of<br />text that is going<br /> 235 * to be wrapped after<br />20 columns."</td> 236 * </tr> 237 * <tr> 238 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 239 * <td>20</td> 240 * <td>null</td> 241 * <td>true/false</td> 242 * <td>" "</td> 243 * <td>"Here is one line of" + systemNewLine + "text that is going" 244 * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td> 245 * </tr> 246 * <tr> 247 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 248 * <td>20</td> 249 * <td>"\n"</td> 250 * <td>false</td> 251 * <td>" "</td> 252 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td> 253 * </tr> 254 * <tr> 255 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 256 * <td>20</td> 257 * <td>"\n"</td> 258 * <td>true</td> 259 * <td>" "</td> 260 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td> 261 * </tr> 262 * <tr> 263 * <td>"flammable/inflammable"</td> 264 * <td>20</td> 265 * <td>"\n"</td> 266 * <td>true</td> 267 * <td>"/"</td> 268 * <td>"flammable\ninflammable"</td> 269 * </tr> 270 * </table> 271 * @param str the String to be word wrapped, may be null 272 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 273 * @param newLineStr the string to insert for a new line, 274 * <code>null</code> uses the system property line separator 275 * @param wrapLongWords true if long words (such as URLs) should be wrapped 276 * @param wrapOn regex expression to be used as a breakable characters, 277 * if blank string is provided a space character will be used 278 * @return a line with newlines inserted, <code>null</code> if null input 279 */ 280 public static String wrap(final String str, 281 int wrapLength, 282 String newLineStr, 283 final boolean wrapLongWords, 284 String wrapOn) { 285 if (str == null) { 286 return null; 287 } 288 if (newLineStr == null) { 289 newLineStr = System.lineSeparator(); 290 } 291 if (wrapLength < 1) { 292 wrapLength = 1; 293 } 294 if (StringUtils.isBlank(wrapOn)) { 295 wrapOn = " "; 296 } 297 final Pattern patternToWrapOn = Pattern.compile(wrapOn); 298 final int inputLineLength = str.length(); 299 int offset = 0; 300 final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32); 301 302 while (offset < inputLineLength) { 303 int spaceToWrapAt = -1; 304 Matcher matcher = patternToWrapOn.matcher(str.substring(offset, Math 305 .min(offset + wrapLength + 1, inputLineLength))); 306 if (matcher.find()) { 307 if (matcher.start() == 0) { 308 offset += matcher.end(); 309 continue; 310 } 311 spaceToWrapAt = matcher.start() + offset; 312 } 313 314 // only last line without leading spaces is left 315 if (inputLineLength - offset <= wrapLength) { 316 break; 317 } 318 319 while (matcher.find()) { 320 spaceToWrapAt = matcher.start() + offset; 321 } 322 323 if (spaceToWrapAt >= offset) { 324 // normal case 325 wrappedLine.append(str, offset, spaceToWrapAt); 326 wrappedLine.append(newLineStr); 327 offset = spaceToWrapAt + 1; 328 329 } else { 330 // really long word or URL 331 if (wrapLongWords) { 332 // wrap really long word one line at a time 333 wrappedLine.append(str, offset, wrapLength + offset); 334 wrappedLine.append(newLineStr); 335 offset += wrapLength; 336 } else { 337 // do not wrap really long word, just extend beyond limit 338 matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength)); 339 if (matcher.find()) { 340 spaceToWrapAt = matcher.start() + offset + wrapLength; 341 } 342 343 if (spaceToWrapAt >= 0) { 344 wrappedLine.append(str, offset, spaceToWrapAt); 345 wrappedLine.append(newLineStr); 346 offset = spaceToWrapAt + 1; 347 } else { 348 wrappedLine.append(str, offset, str.length()); 349 offset = inputLineLength; 350 } 351 } 352 } 353 } 354 355 // Whatever is left in line is short enough to just pass through 356 wrappedLine.append(str, offset, str.length()); 357 358 return wrappedLine.toString(); 359 } 360 361 // Capitalizing 362 //----------------------------------------------------------------------- 363 /** 364 * <p>Capitalizes all the whitespace separated words in a String. 365 * Only the first character of each word is changed. To convert the 366 * rest of each word to lowercase at the same time, 367 * use {@link #capitalizeFully(String)}.</p> 368 * 369 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 370 * A <code>null</code> input String returns <code>null</code>. 371 * Capitalization uses the Unicode title case, normally equivalent to 372 * upper case.</p> 373 * 374 * <pre> 375 * WordUtils.capitalize(null) = null 376 * WordUtils.capitalize("") = "" 377 * WordUtils.capitalize("i am FINE") = "I Am FINE" 378 * </pre> 379 * 380 * @param str the String to capitalize, may be null 381 * @return capitalized String, <code>null</code> if null String input 382 * @see #uncapitalize(String) 383 * @see #capitalizeFully(String) 384 */ 385 public static String capitalize(final String str) { 386 return capitalize(str, null); 387 } 388 389 /** 390 * <p>Capitalizes all the delimiter separated words in a String. 391 * Only the first character of each word is changed. To convert the 392 * rest of each word to lowercase at the same time, 393 * use {@link #capitalizeFully(String, char[])}.</p> 394 * 395 * <p>The delimiters represent a set of characters understood to separate words. 396 * The first string character and the first non-delimiter character after a 397 * delimiter will be capitalized. </p> 398 * 399 * <p>A <code>null</code> input String returns <code>null</code>. 400 * Capitalization uses the Unicode title case, normally equivalent to 401 * upper case.</p> 402 * 403 * <pre> 404 * WordUtils.capitalize(null, *) = null 405 * WordUtils.capitalize("", *) = "" 406 * WordUtils.capitalize(*, new char[0]) = * 407 * WordUtils.capitalize("i am fine", null) = "I Am Fine" 408 * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine" 409 * WordUtils.capitalize("i am fine", new char[]{}) = "I am fine" 410 * </pre> 411 * 412 * @param str the String to capitalize, may be null 413 * @param delimiters set of characters to determine capitalization, null means whitespace 414 * @return capitalized String, <code>null</code> if null String input 415 * @see #uncapitalize(String) 416 * @see #capitalizeFully(String) 417 */ 418 public static String capitalize(final String str, final char... delimiters) { 419 if (StringUtils.isEmpty(str)) { 420 return str; 421 } 422 final Set<Integer> delimiterSet = generateDelimiterSet(delimiters); 423 final int strLen = str.length(); 424 final int[] newCodePoints = new int[strLen]; 425 int outOffset = 0; 426 427 boolean capitalizeNext = true; 428 for (int index = 0; index < strLen;) { 429 final int codePoint = str.codePointAt(index); 430 431 if (delimiterSet.contains(codePoint)) { 432 capitalizeNext = true; 433 newCodePoints[outOffset++] = codePoint; 434 index += Character.charCount(codePoint); 435 } else if (capitalizeNext) { 436 final int titleCaseCodePoint = Character.toTitleCase(codePoint); 437 newCodePoints[outOffset++] = titleCaseCodePoint; 438 index += Character.charCount(titleCaseCodePoint); 439 capitalizeNext = false; 440 } else { 441 newCodePoints[outOffset++] = codePoint; 442 index += Character.charCount(codePoint); 443 } 444 } 445 return new String(newCodePoints, 0, outOffset); 446 } 447 448 //----------------------------------------------------------------------- 449 /** 450 * <p>Converts all the whitespace separated words in a String into capitalized words, 451 * that is each word is made up of a titlecase character and then a series of 452 * lowercase characters. </p> 453 * 454 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 455 * A <code>null</code> input String returns <code>null</code>. 456 * Capitalization uses the Unicode title case, normally equivalent to 457 * upper case.</p> 458 * 459 * <pre> 460 * WordUtils.capitalizeFully(null) = null 461 * WordUtils.capitalizeFully("") = "" 462 * WordUtils.capitalizeFully("i am FINE") = "I Am Fine" 463 * </pre> 464 * 465 * @param str the String to capitalize, may be null 466 * @return capitalized String, <code>null</code> if null String input 467 */ 468 public static String capitalizeFully(final String str) { 469 return capitalizeFully(str, null); 470 } 471 472 /** 473 * <p>Converts all the delimiter separated words in a String into capitalized words, 474 * that is each word is made up of a titlecase character and then a series of 475 * lowercase characters. </p> 476 * 477 * <p>The delimiters represent a set of characters understood to separate words. 478 * The first string character and the first non-delimiter character after a 479 * delimiter will be capitalized. </p> 480 * 481 * <p>A <code>null</code> input String returns <code>null</code>. 482 * Capitalization uses the Unicode title case, normally equivalent to 483 * upper case.</p> 484 * 485 * <pre> 486 * WordUtils.capitalizeFully(null, *) = null 487 * WordUtils.capitalizeFully("", *) = "" 488 * WordUtils.capitalizeFully(*, null) = * 489 * WordUtils.capitalizeFully(*, new char[0]) = * 490 * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine" 491 * </pre> 492 * 493 * @param str the String to capitalize, may be null 494 * @param delimiters set of characters to determine capitalization, null means whitespace 495 * @return capitalized String, <code>null</code> if null String input 496 */ 497 public static String capitalizeFully(String str, final char... delimiters) { 498 if (StringUtils.isEmpty(str)) { 499 return str; 500 } 501 str = str.toLowerCase(); 502 return capitalize(str, delimiters); 503 } 504 505 //----------------------------------------------------------------------- 506 /** 507 * <p>Uncapitalizes all the whitespace separated words in a String. 508 * Only the first character of each word is changed.</p> 509 * 510 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 511 * A <code>null</code> input String returns <code>null</code>.</p> 512 * 513 * <pre> 514 * WordUtils.uncapitalize(null) = null 515 * WordUtils.uncapitalize("") = "" 516 * WordUtils.uncapitalize("I Am FINE") = "i am fINE" 517 * </pre> 518 * 519 * @param str the String to uncapitalize, may be null 520 * @return uncapitalized String, <code>null</code> if null String input 521 * @see #capitalize(String) 522 */ 523 public static String uncapitalize(final String str) { 524 return uncapitalize(str, null); 525 } 526 527 /** 528 * <p>Uncapitalizes all the whitespace separated words in a String. 529 * Only the first character of each word is changed.</p> 530 * 531 * <p>The delimiters represent a set of characters understood to separate words. 532 * The first string character and the first non-delimiter character after a 533 * delimiter will be uncapitalized. </p> 534 * 535 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 536 * A <code>null</code> input String returns <code>null</code>.</p> 537 * 538 * <pre> 539 * WordUtils.uncapitalize(null, *) = null 540 * WordUtils.uncapitalize("", *) = "" 541 * WordUtils.uncapitalize(*, null) = * 542 * WordUtils.uncapitalize(*, new char[0]) = * 543 * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE" 544 * WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine" 545 * </pre> 546 * 547 * @param str the String to uncapitalize, may be null 548 * @param delimiters set of characters to determine uncapitalization, null means whitespace 549 * @return uncapitalized String, <code>null</code> if null String input 550 * @see #capitalize(String) 551 */ 552 public static String uncapitalize(final String str, final char... delimiters) { 553 if (StringUtils.isEmpty(str)) { 554 return str; 555 } 556 final Set<Integer> delimiterSet = generateDelimiterSet(delimiters); 557 final int strLen = str.length(); 558 final int[] newCodePoints = new int[strLen]; 559 int outOffset = 0; 560 561 boolean uncapitalizeNext = true; 562 for (int index = 0; index < strLen;) { 563 final int codePoint = str.codePointAt(index); 564 565 if (delimiterSet.contains(codePoint)) { 566 uncapitalizeNext = true; 567 newCodePoints[outOffset++] = codePoint; 568 index += Character.charCount(codePoint); 569 } else if (uncapitalizeNext) { 570 final int titleCaseCodePoint = Character.toLowerCase(codePoint); 571 newCodePoints[outOffset++] = titleCaseCodePoint; 572 index += Character.charCount(titleCaseCodePoint); 573 uncapitalizeNext = false; 574 } else { 575 newCodePoints[outOffset++] = codePoint; 576 index += Character.charCount(codePoint); 577 } 578 } 579 return new String(newCodePoints, 0, outOffset); 580 } 581 582 //----------------------------------------------------------------------- 583 /** 584 * <p>Swaps the case of a String using a word based algorithm.</p> 585 * 586 * <ul> 587 * <li>Upper case character converts to Lower case</li> 588 * <li>Title case character converts to Lower case</li> 589 * <li>Lower case character after Whitespace or at start converts to Title case</li> 590 * <li>Other Lower case character converts to Upper case</li> 591 * </ul> 592 * 593 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 594 * A <code>null</code> input String returns <code>null</code>.</p> 595 * 596 * <pre> 597 * StringUtils.swapCase(null) = null 598 * StringUtils.swapCase("") = "" 599 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone" 600 * </pre> 601 * 602 * @param str the String to swap case, may be null 603 * @return the changed String, <code>null</code> if null String input 604 */ 605 public static String swapCase(final String str) { 606 if (StringUtils.isEmpty(str)) { 607 return str; 608 } 609 final int strLen = str.length(); 610 final int[] newCodePoints = new int[strLen]; 611 int outOffset = 0; 612 boolean whitespace = true; 613 for (int index = 0; index < strLen;) { 614 final int oldCodepoint = str.codePointAt(index); 615 final int newCodePoint; 616 if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) { 617 newCodePoint = Character.toLowerCase(oldCodepoint); 618 whitespace = false; 619 } else if (Character.isLowerCase(oldCodepoint)) { 620 if (whitespace) { 621 newCodePoint = Character.toTitleCase(oldCodepoint); 622 whitespace = false; 623 } else { 624 newCodePoint = Character.toUpperCase(oldCodepoint); 625 } 626 } else { 627 whitespace = Character.isWhitespace(oldCodepoint); 628 newCodePoint = oldCodepoint; 629 } 630 newCodePoints[outOffset++] = newCodePoint; 631 index += Character.charCount(newCodePoint); 632 } 633 return new String(newCodePoints, 0, outOffset); 634 } 635 636 //----------------------------------------------------------------------- 637 /** 638 * <p>Extracts the initial characters from each word in the String.</p> 639 * 640 * <p>All first characters after whitespace are returned as a new string. 641 * Their case is not changed.</p> 642 * 643 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 644 * A <code>null</code> input String returns <code>null</code>.</p> 645 * 646 * <pre> 647 * WordUtils.initials(null) = null 648 * WordUtils.initials("") = "" 649 * WordUtils.initials("Ben John Lee") = "BJL" 650 * WordUtils.initials("Ben J.Lee") = "BJ" 651 * </pre> 652 * 653 * @param str the String to get initials from, may be null 654 * @return String of initial letters, <code>null</code> if null String input 655 * @see #initials(String,char[]) 656 */ 657 public static String initials(final String str) { 658 return initials(str, null); 659 } 660 661 /** 662 * <p>Extracts the initial characters from each word in the String.</p> 663 * 664 * <p>All first characters after the defined delimiters are returned as a new string. 665 * Their case is not changed.</p> 666 * 667 * <p>If the delimiters array is null, then Whitespace is used. 668 * Whitespace is defined by {@link Character#isWhitespace(char)}. 669 * A <code>null</code> input String returns <code>null</code>. 670 * An empty delimiter array returns an empty String.</p> 671 * 672 * <pre> 673 * WordUtils.initials(null, *) = null 674 * WordUtils.initials("", *) = "" 675 * WordUtils.initials("Ben John Lee", null) = "BJL" 676 * WordUtils.initials("Ben J.Lee", null) = "BJ" 677 * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL" 678 * WordUtils.initials(*, new char[0]) = "" 679 * </pre> 680 * 681 * @param str the String to get initials from, may be null 682 * @param delimiters set of characters to determine words, null means whitespace 683 * @return String of initial characters, <code>null</code> if null String input 684 * @see #initials(String) 685 */ 686 public static String initials(final String str, final char... delimiters) { 687 if (StringUtils.isEmpty(str)) { 688 return str; 689 } 690 if (delimiters != null && delimiters.length == 0) { 691 return ""; 692 } 693 final Set<Integer> delimiterSet = generateDelimiterSet(delimiters); 694 final int strLen = str.length(); 695 final int[] newCodePoints = new int[strLen / 2 + 1]; 696 int count = 0; 697 boolean lastWasGap = true; 698 for (int i = 0; i < strLen;) { 699 final int codePoint = str.codePointAt(i); 700 701 if (delimiterSet.contains(codePoint) || (delimiters == null && Character.isWhitespace(codePoint))) { 702 lastWasGap = true; 703 } else if (lastWasGap) { 704 newCodePoints[count++] = codePoint; 705 lastWasGap = false; 706 } 707 708 i += Character.charCount(codePoint); 709 } 710 return new String(newCodePoints, 0, count); 711 } 712 713 //----------------------------------------------------------------------- 714 /** 715 * <p>Checks if the String contains all words in the given array.</p> 716 * 717 * <p> 718 * A {@code null} String will return {@code false}. A {@code null}, zero 719 * length search array or if one element of array is null will return {@code false}. 720 * </p> 721 * 722 * <pre> 723 * WordUtils.containsAllWords(null, *) = false 724 * WordUtils.containsAllWords("", *) = false 725 * WordUtils.containsAllWords(*, null) = false 726 * WordUtils.containsAllWords(*, []) = false 727 * WordUtils.containsAllWords("abcd", "ab", "cd") = false 728 * WordUtils.containsAllWords("abc def", "def", "abc") = true 729 * </pre> 730 * 731 * @param word The CharSequence to check, may be null 732 * @param words The array of String words to search for, may be null 733 * @return {@code true} if all search words are found, {@code false} otherwise 734 */ 735 public static boolean containsAllWords(final CharSequence word, final CharSequence... words) { 736 if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) { 737 return false; 738 } 739 for (final CharSequence w : words) { 740 if (StringUtils.isBlank(w)) { 741 return false; 742 } 743 final Pattern p = Pattern.compile(".*\\b" + w + "\\b.*"); 744 if (!p.matcher(word).matches()) { 745 return false; 746 } 747 } 748 return true; 749 } 750 751 //----------------------------------------------------------------------- 752 /** 753 * Is the character a delimiter. 754 * 755 * @param ch the character to check 756 * @param delimiters the delimiters 757 * @return true if it is a delimiter 758 * @deprecated as of 1.2 and will be removed in 2.0 759 */ 760 @Deprecated 761 public static boolean isDelimiter(final char ch, final char[] delimiters) { 762 if (delimiters == null) { 763 return Character.isWhitespace(ch); 764 } 765 for (final char delimiter : delimiters) { 766 if (ch == delimiter) { 767 return true; 768 } 769 } 770 return false; 771 } 772 773 //----------------------------------------------------------------------- 774 /** 775 * Is the codePoint a delimiter. 776 * 777 * @param codePoint the codePint to check 778 * @param delimiters the delimiters 779 * @return true if it is a delimiter 780 * @deprecated as of 1.2 and will be removed in 2.0 781 */ 782 @Deprecated 783 public static boolean isDelimiter(final int codePoint, final char[] delimiters) { 784 if (delimiters == null) { 785 return Character.isWhitespace(codePoint); 786 } 787 for (int index = 0; index < delimiters.length; index++) { 788 final int delimiterCodePoint = Character.codePointAt(delimiters, index); 789 if (delimiterCodePoint == codePoint) { 790 return true; 791 } 792 } 793 return false; 794 } 795 796 //----------------------------------------------------------------------- 797 /** 798 * Abbreviates the words nicely. 799 * 800 * This method searches for the first space after the lower limit and abbreviates 801 * the String there. It will also append any String passed as a parameter 802 * to the end of the String. The upper limit can be specified to forcibly 803 * abbreviate a String. 804 * 805 * @param str the string to be abbreviated. If null is passed, null is returned. 806 * If the empty String is passed, the empty string is returned. 807 * @param lower the lower limit. 808 * @param upper the upper limit; specify -1 if no limit is desired. 809 * If the upper limit is lower than the lower limit, it will be 810 * adjusted to be the same as the lower limit. 811 * @param appendToEnd String to be appended to the end of the abbreviated string. 812 * This is appended ONLY if the string was indeed abbreviated. 813 * The append does not count towards the lower or upper limits. 814 * @return the abbreviated String. 815 * 816 * <pre> 817 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null)); = "Now" 818 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null)); = "Now is the" 819 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null)); = "Now is the time for all" 820 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, "")); = "Now" 821 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, "")); = "Now is the" 822 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, "")); = "Now is the time for all" 823 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ...")); = "Now ..." 824 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ...")); = "Now is the ..." 825 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ...")); = "Now is the time for all ..." 826 * WordUtils.abbreviate("Now is the time for all good men", 0, -1, "")); = "Now" 827 * WordUtils.abbreviate("Now is the time for all good men", 10, -1, "")); = "Now is the" 828 * WordUtils.abbreviate("Now is the time for all good men", 20, -1, "")); = "Now is the time for all" 829 * WordUtils.abbreviate("Now is the time for all good men", 50, -1, "")); = "Now is the time for all good men" 830 * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, "")); = "Now is the time for all good men" 831 * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null)); = IllegalArgumentException 832 * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null)); = IllegalArgumentException 833 * </pre> 834 */ 835 public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) { 836 Validate.isTrue(upper >= -1, "upper value cannot be less than -1"); 837 Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value"); 838 839 if (StringUtils.isEmpty(str)) { 840 return str; 841 } 842 843 // if the lower value is greater than the length of the string, 844 // set to the length of the string 845 if (lower > str.length()) { 846 lower = str.length(); 847 } 848 849 // if the upper value is -1 (i.e. no limit) or is greater 850 // than the length of the string, set to the length of the string 851 if (upper == -1 || upper > str.length()) { 852 upper = str.length(); 853 } 854 855 final StringBuilder result = new StringBuilder(); 856 final int index = StringUtils.indexOf(str, " ", lower); 857 if (index == -1) { 858 result.append(str, 0, upper); 859 // only if abbreviation has occured do we append the appendToEnd value 860 if (upper != str.length()) { 861 result.append(StringUtils.defaultString(appendToEnd)); 862 } 863 } else if (index > upper) { 864 result.append(str, 0, upper); 865 result.append(StringUtils.defaultString(appendToEnd)); 866 } else { 867 result.append(str, 0, index); 868 result.append(StringUtils.defaultString(appendToEnd)); 869 } 870 871 return result.toString(); 872 } 873 874 // ----------------------------------------------------------------------- 875 /** 876 * <p> 877 * Converts an array of delimiters to a hash set of code points. Code point of space(32) is added as the default 878 * value if delimiters is null. The generated hash set provides O(1) lookup time. 879 * </p> 880 * 881 * @param delimiters set of characters to determine capitalization, null means whitespace 882 * @return Set<Integer> 883 */ 884 private static Set<Integer> generateDelimiterSet(final char[] delimiters) { 885 final Set<Integer> delimiterHashSet = new HashSet<>(); 886 if (delimiters == null || delimiters.length == 0) { 887 if (delimiters == null) { 888 delimiterHashSet.add(Character.codePointAt(new char[] {' '}, 0)); 889 } 890 891 return delimiterHashSet; 892 } 893 894 for (int index = 0; index < delimiters.length; index++) { 895 delimiterHashSet.add(Character.codePointAt(delimiters, index)); 896 } 897 return delimiterHashSet; 898 } 899 }