001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import java.util.HashSet;
020import java.util.Set;
021import java.util.regex.Matcher;
022import java.util.regex.Pattern;
023
024import org.apache.commons.lang3.ArrayUtils;
025import org.apache.commons.lang3.StringUtils;
026import org.apache.commons.lang3.Validate;
027
028/**
029 * <p>
030 * Operations on Strings that contain words.
031 * </p>
032 *
033 * <p>
034 * This class tries to handle <code>null</code> input gracefully. An exception will not be thrown for a
035 * <code>null</code> input. Each method documents its behavior in more detail.
036 * </p>
037 *
038 * @since 1.1
039 */
040public class WordUtils {
041
042    /**
043     * <p><code>WordUtils</code> instances should NOT be constructed in
044     * standard programming. Instead, the class should be used as
045     * <code>WordUtils.wrap("foo bar", 20);</code>.</p>
046     *
047     * <p>This constructor is public to permit tools that require a JavaBean
048     * instance to operate.</p>
049     */
050    public WordUtils() {
051      super();
052    }
053
054    // Wrapping
055    //--------------------------------------------------------------------------
056    /**
057     * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
058     *
059     * <p>New lines will be separated by the system property line separator.
060     * Very long words, such as URLs will <i>not</i> be wrapped.</p>
061     *
062     * <p>Leading spaces on a new line are stripped.
063     * Trailing spaces are not stripped.</p>
064     *
065     * <table border="1" summary="Wrap Results">
066     *  <tr>
067     *   <th>input</th>
068     *   <th>wrapLength</th>
069     *   <th>result</th>
070     *  </tr>
071     *  <tr>
072     *   <td>null</td>
073     *   <td>*</td>
074     *   <td>null</td>
075     *  </tr>
076     *  <tr>
077     *   <td>""</td>
078     *   <td>*</td>
079     *   <td>""</td>
080     *  </tr>
081     *  <tr>
082     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
083     *   <td>20</td>
084     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
085     *  </tr>
086     *  <tr>
087     *   <td>"Click here to jump to the commons website - http://commons.apache.org"</td>
088     *   <td>20</td>
089     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td>
090     *  </tr>
091     *  <tr>
092     *   <td>"Click here, http://commons.apache.org, to jump to the commons website"</td>
093     *   <td>20</td>
094     *   <td>"Click here,\nhttp://commons.apache.org,\nto jump to the\ncommons website"</td>
095     *  </tr>
096     * </table>
097     *
098     * (assuming that '\n' is the systems line separator)
099     *
100     * @param str  the String to be word wrapped, may be null
101     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
102     * @return a line with newlines inserted, <code>null</code> if null input
103     */
104    public static String wrap(final String str, final int wrapLength) {
105        return wrap(str, wrapLength, null, false);
106    }
107
108    /**
109     * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
110     *
111     * <p>Leading spaces on a new line are stripped.
112     * Trailing spaces are not stripped.</p>
113     *
114     * <table border="1" summary="Wrap Results">
115     *  <tr>
116     *   <th>input</th>
117     *   <th>wrapLength</th>
118     *   <th>newLineString</th>
119     *   <th>wrapLongWords</th>
120     *   <th>result</th>
121     *  </tr>
122     *  <tr>
123     *   <td>null</td>
124     *   <td>*</td>
125     *   <td>*</td>
126     *   <td>true/false</td>
127     *   <td>null</td>
128     *  </tr>
129     *  <tr>
130     *   <td>""</td>
131     *   <td>*</td>
132     *   <td>*</td>
133     *   <td>true/false</td>
134     *   <td>""</td>
135     *  </tr>
136     *  <tr>
137     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
138     *   <td>20</td>
139     *   <td>"\n"</td>
140     *   <td>true/false</td>
141     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
142     *  </tr>
143     *  <tr>
144     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
145     *   <td>20</td>
146     *   <td>"&lt;br /&gt;"</td>
147     *   <td>true/false</td>
148     *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;
149     *   br /&gt;to be wrapped after&lt;br /&gt;20 columns."</td>
150     *  </tr>
151     *  <tr>
152     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
153     *   <td>20</td>
154     *   <td>null</td>
155     *   <td>true/false</td>
156     *   <td>"Here is one line of" + systemNewLine + "text that is going"
157     *   + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
158     *  </tr>
159     *  <tr>
160     *   <td>"Click here to jump to the commons website - http://commons.apache.org"</td>
161     *   <td>20</td>
162     *   <td>"\n"</td>
163     *   <td>false</td>
164     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td>
165     *  </tr>
166     *  <tr>
167     *   <td>"Click here to jump to the commons website - http://commons.apache.org"</td>
168     *   <td>20</td>
169     *   <td>"\n"</td>
170     *   <td>true</td>
171     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td>
172     *  </tr>
173     * </table>
174     *
175     * @param str  the String to be word wrapped, may be null
176     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
177     * @param newLineStr  the string to insert for a new line,
178     *  <code>null</code> uses the system property line separator
179     * @param wrapLongWords  true if long words (such as URLs) should be wrapped
180     * @return a line with newlines inserted, <code>null</code> if null input
181     */
182    public static String wrap(final String str,
183                              final int wrapLength,
184                              final String newLineStr,
185                              final boolean wrapLongWords) {
186        return wrap(str, wrapLength, newLineStr, wrapLongWords, " ");
187    }
188
189    /**
190     * <p>Wraps a single line of text, identifying words by <code>wrapOn</code>.</p>
191     *
192     * <p>Leading spaces on a new line are stripped.
193     * Trailing spaces are not stripped.</p>
194     *
195     * <table border="1" summary="Wrap Results">
196     *  <tr>
197     *   <th>input</th>
198     *   <th>wrapLength</th>
199     *   <th>newLineString</th>
200     *   <th>wrapLongWords</th>
201     *   <th>wrapOn</th>
202     *   <th>result</th>
203     *  </tr>
204     *  <tr>
205     *   <td>null</td>
206     *   <td>*</td>
207     *   <td>*</td>
208     *   <td>true/false</td>
209     *   <td>*</td>
210     *   <td>null</td>
211     *  </tr>
212     *  <tr>
213     *   <td>""</td>
214     *   <td>*</td>
215     *   <td>*</td>
216     *   <td>true/false</td>
217     *   <td>*</td>
218     *   <td>""</td>
219     *  </tr>
220     *  <tr>
221     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
222     *   <td>20</td>
223     *   <td>"\n"</td>
224     *   <td>true/false</td>
225     *   <td>" "</td>
226     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
227     *  </tr>
228     *  <tr>
229     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
230     *   <td>20</td>
231     *   <td>"&lt;br /&gt;"</td>
232     *   <td>true/false</td>
233     *   <td>" "</td>
234     *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;br /&gt;
235     *   to be wrapped after&lt;br /&gt;20 columns."</td>
236     *  </tr>
237     *  <tr>
238     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
239     *   <td>20</td>
240     *   <td>null</td>
241     *   <td>true/false</td>
242     *   <td>" "</td>
243     *   <td>"Here is one line of" + systemNewLine + "text that is going"
244     *   + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
245     *  </tr>
246     *  <tr>
247     *   <td>"Click here to jump to the commons website - http://commons.apache.org"</td>
248     *   <td>20</td>
249     *   <td>"\n"</td>
250     *   <td>false</td>
251     *   <td>" "</td>
252     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td>
253     *  </tr>
254     *  <tr>
255     *   <td>"Click here to jump to the commons website - http://commons.apache.org"</td>
256     *   <td>20</td>
257     *   <td>"\n"</td>
258     *   <td>true</td>
259     *   <td>" "</td>
260     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td>
261     *  </tr>
262     *  <tr>
263     *   <td>"flammable/inflammable"</td>
264     *   <td>20</td>
265     *   <td>"\n"</td>
266     *   <td>true</td>
267     *   <td>"/"</td>
268     *   <td>"flammable\ninflammable"</td>
269     *  </tr>
270     * </table>
271     * @param str  the String to be word wrapped, may be null
272     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
273     * @param newLineStr  the string to insert for a new line,
274     *  <code>null</code> uses the system property line separator
275     * @param wrapLongWords  true if long words (such as URLs) should be wrapped
276     * @param wrapOn regex expression to be used as a breakable characters,
277     *               if blank string is provided a space character will be used
278     * @return a line with newlines inserted, <code>null</code> if null input
279     */
280    public static String wrap(final String str,
281                              int wrapLength,
282                              String newLineStr,
283                              final boolean wrapLongWords,
284                              String wrapOn) {
285        if (str == null) {
286            return null;
287        }
288        if (newLineStr == null) {
289            newLineStr = System.lineSeparator();
290        }
291        if (wrapLength < 1) {
292            wrapLength = 1;
293        }
294        if (StringUtils.isBlank(wrapOn)) {
295            wrapOn = " ";
296        }
297        final Pattern patternToWrapOn = Pattern.compile(wrapOn);
298        final int inputLineLength = str.length();
299        int offset = 0;
300        final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
301
302        while (offset < inputLineLength) {
303            int spaceToWrapAt = -1;
304            Matcher matcher = patternToWrapOn.matcher(str.substring(offset, Math
305                    .min(offset + wrapLength + 1, inputLineLength)));
306            if (matcher.find()) {
307                if (matcher.start() == 0) {
308                    offset += matcher.end();
309                    continue;
310                }
311                spaceToWrapAt = matcher.start() + offset;
312            }
313
314            // only last line without leading spaces is left
315            if (inputLineLength - offset <= wrapLength) {
316                break;
317            }
318
319            while (matcher.find()) {
320                spaceToWrapAt = matcher.start() + offset;
321            }
322
323            if (spaceToWrapAt >= offset) {
324                // normal case
325                wrappedLine.append(str, offset, spaceToWrapAt);
326                wrappedLine.append(newLineStr);
327                offset = spaceToWrapAt + 1;
328
329            } else {
330                // really long word or URL
331                if (wrapLongWords) {
332                    // wrap really long word one line at a time
333                    wrappedLine.append(str, offset, wrapLength + offset);
334                    wrappedLine.append(newLineStr);
335                    offset += wrapLength;
336                } else {
337                    // do not wrap really long word, just extend beyond limit
338                    matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength));
339                    if (matcher.find()) {
340                        spaceToWrapAt = matcher.start() + offset + wrapLength;
341                    }
342
343                    if (spaceToWrapAt >= 0) {
344                        wrappedLine.append(str, offset, spaceToWrapAt);
345                        wrappedLine.append(newLineStr);
346                        offset = spaceToWrapAt + 1;
347                    } else {
348                        wrappedLine.append(str, offset, str.length());
349                        offset = inputLineLength;
350                    }
351                }
352            }
353        }
354
355        // Whatever is left in line is short enough to just pass through
356        wrappedLine.append(str, offset, str.length());
357
358        return wrappedLine.toString();
359    }
360
361    // Capitalizing
362    //-----------------------------------------------------------------------
363    /**
364     * <p>Capitalizes all the whitespace separated words in a String.
365     * Only the first character of each word is changed. To convert the
366     * rest of each word to lowercase at the same time,
367     * use {@link #capitalizeFully(String)}.</p>
368     *
369     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
370     * A <code>null</code> input String returns <code>null</code>.
371     * Capitalization uses the Unicode title case, normally equivalent to
372     * upper case.</p>
373     *
374     * <pre>
375     * WordUtils.capitalize(null)        = null
376     * WordUtils.capitalize("")          = ""
377     * WordUtils.capitalize("i am FINE") = "I Am FINE"
378     * </pre>
379     *
380     * @param str  the String to capitalize, may be null
381     * @return capitalized String, <code>null</code> if null String input
382     * @see #uncapitalize(String)
383     * @see #capitalizeFully(String)
384     */
385    public static String capitalize(final String str) {
386        return capitalize(str, null);
387    }
388
389    /**
390     * <p>Capitalizes all the delimiter separated words in a String.
391     * Only the first character of each word is changed. To convert the
392     * rest of each word to lowercase at the same time,
393     * use {@link #capitalizeFully(String, char[])}.</p>
394     *
395     * <p>The delimiters represent a set of characters understood to separate words.
396     * The first string character and the first non-delimiter character after a
397     * delimiter will be capitalized. </p>
398     *
399     * <p>A <code>null</code> input String returns <code>null</code>.
400     * Capitalization uses the Unicode title case, normally equivalent to
401     * upper case.</p>
402     *
403     * <pre>
404     * WordUtils.capitalize(null, *)            = null
405     * WordUtils.capitalize("", *)              = ""
406     * WordUtils.capitalize(*, new char[0])     = *
407     * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
408     * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
409     * WordUtils.capitalize("i am fine", new char[]{}) = "I am fine"
410     * </pre>
411     *
412     * @param str  the String to capitalize, may be null
413     * @param delimiters  set of characters to determine capitalization, null means whitespace
414     * @return capitalized String, <code>null</code> if null String input
415     * @see #uncapitalize(String)
416     * @see #capitalizeFully(String)
417     */
418    public static String capitalize(final String str, final char... delimiters) {
419        if (StringUtils.isEmpty(str)) {
420            return str;
421        }
422        final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
423        final int strLen = str.length();
424        final int[] newCodePoints = new int[strLen];
425        int outOffset = 0;
426
427        boolean capitalizeNext = true;
428        for (int index = 0; index < strLen;) {
429            final int codePoint = str.codePointAt(index);
430
431            if (delimiterSet.contains(codePoint)) {
432                capitalizeNext = true;
433                newCodePoints[outOffset++] = codePoint;
434                index += Character.charCount(codePoint);
435            } else if (capitalizeNext) {
436                final int titleCaseCodePoint = Character.toTitleCase(codePoint);
437                newCodePoints[outOffset++] = titleCaseCodePoint;
438                index += Character.charCount(titleCaseCodePoint);
439                capitalizeNext = false;
440            } else {
441                newCodePoints[outOffset++] = codePoint;
442                index += Character.charCount(codePoint);
443            }
444        }
445        return new String(newCodePoints, 0, outOffset);
446    }
447
448    //-----------------------------------------------------------------------
449    /**
450     * <p>Converts all the whitespace separated words in a String into capitalized words,
451     * that is each word is made up of a titlecase character and then a series of
452     * lowercase characters.  </p>
453     *
454     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
455     * A <code>null</code> input String returns <code>null</code>.
456     * Capitalization uses the Unicode title case, normally equivalent to
457     * upper case.</p>
458     *
459     * <pre>
460     * WordUtils.capitalizeFully(null)        = null
461     * WordUtils.capitalizeFully("")          = ""
462     * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
463     * </pre>
464     *
465     * @param str  the String to capitalize, may be null
466     * @return capitalized String, <code>null</code> if null String input
467     */
468    public static String capitalizeFully(final String str) {
469        return capitalizeFully(str, null);
470    }
471
472    /**
473     * <p>Converts all the delimiter separated words in a String into capitalized words,
474     * that is each word is made up of a titlecase character and then a series of
475     * lowercase characters. </p>
476     *
477     * <p>The delimiters represent a set of characters understood to separate words.
478     * The first string character and the first non-delimiter character after a
479     * delimiter will be capitalized. </p>
480     *
481     * <p>A <code>null</code> input String returns <code>null</code>.
482     * Capitalization uses the Unicode title case, normally equivalent to
483     * upper case.</p>
484     *
485     * <pre>
486     * WordUtils.capitalizeFully(null, *)            = null
487     * WordUtils.capitalizeFully("", *)              = ""
488     * WordUtils.capitalizeFully(*, null)            = *
489     * WordUtils.capitalizeFully(*, new char[0])     = *
490     * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
491     * </pre>
492     *
493     * @param str  the String to capitalize, may be null
494     * @param delimiters  set of characters to determine capitalization, null means whitespace
495     * @return capitalized String, <code>null</code> if null String input
496     */
497    public static String capitalizeFully(String str, final char... delimiters) {
498        if (StringUtils.isEmpty(str)) {
499            return str;
500        }
501        str = str.toLowerCase();
502        return capitalize(str, delimiters);
503    }
504
505    //-----------------------------------------------------------------------
506    /**
507     * <p>Uncapitalizes all the whitespace separated words in a String.
508     * Only the first character of each word is changed.</p>
509     *
510     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
511     * A <code>null</code> input String returns <code>null</code>.</p>
512     *
513     * <pre>
514     * WordUtils.uncapitalize(null)        = null
515     * WordUtils.uncapitalize("")          = ""
516     * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
517     * </pre>
518     *
519     * @param str  the String to uncapitalize, may be null
520     * @return uncapitalized String, <code>null</code> if null String input
521     * @see #capitalize(String)
522     */
523    public static String uncapitalize(final String str) {
524        return uncapitalize(str, null);
525    }
526
527    /**
528     * <p>Uncapitalizes all the whitespace separated words in a String.
529     * Only the first character of each word is changed.</p>
530     *
531     * <p>The delimiters represent a set of characters understood to separate words.
532     * The first string character and the first non-delimiter character after a
533     * delimiter will be uncapitalized. </p>
534     *
535     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
536     * A <code>null</code> input String returns <code>null</code>.</p>
537     *
538     * <pre>
539     * WordUtils.uncapitalize(null, *)            = null
540     * WordUtils.uncapitalize("", *)              = ""
541     * WordUtils.uncapitalize(*, null)            = *
542     * WordUtils.uncapitalize(*, new char[0])     = *
543     * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
544     * WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine"
545     * </pre>
546     *
547     * @param str  the String to uncapitalize, may be null
548     * @param delimiters  set of characters to determine uncapitalization, null means whitespace
549     * @return uncapitalized String, <code>null</code> if null String input
550     * @see #capitalize(String)
551     */
552    public static String uncapitalize(final String str, final char... delimiters) {
553        if (StringUtils.isEmpty(str)) {
554            return str;
555        }
556        final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
557        final int strLen = str.length();
558        final int[] newCodePoints = new int[strLen];
559        int outOffset = 0;
560
561        boolean uncapitalizeNext = true;
562        for (int index = 0; index < strLen;) {
563            final int codePoint = str.codePointAt(index);
564
565            if (delimiterSet.contains(codePoint)) {
566                uncapitalizeNext = true;
567                newCodePoints[outOffset++] = codePoint;
568                index += Character.charCount(codePoint);
569            } else if (uncapitalizeNext) {
570                final int titleCaseCodePoint = Character.toLowerCase(codePoint);
571                newCodePoints[outOffset++] = titleCaseCodePoint;
572                index += Character.charCount(titleCaseCodePoint);
573                uncapitalizeNext = false;
574            } else {
575                newCodePoints[outOffset++] = codePoint;
576                index += Character.charCount(codePoint);
577            }
578        }
579        return new String(newCodePoints, 0, outOffset);
580    }
581
582    //-----------------------------------------------------------------------
583    /**
584     * <p>Swaps the case of a String using a word based algorithm.</p>
585     *
586     * <ul>
587     *  <li>Upper case character converts to Lower case</li>
588     *  <li>Title case character converts to Lower case</li>
589     *  <li>Lower case character after Whitespace or at start converts to Title case</li>
590     *  <li>Other Lower case character converts to Upper case</li>
591     * </ul>
592     *
593     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
594     * A <code>null</code> input String returns <code>null</code>.</p>
595     *
596     * <pre>
597     * StringUtils.swapCase(null)                 = null
598     * StringUtils.swapCase("")                   = ""
599     * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
600     * </pre>
601     *
602     * @param str  the String to swap case, may be null
603     * @return the changed String, <code>null</code> if null String input
604     */
605    public static String swapCase(final String str) {
606        if (StringUtils.isEmpty(str)) {
607            return str;
608        }
609        final int strLen = str.length();
610        final int[] newCodePoints = new int[strLen];
611        int outOffset = 0;
612        boolean whitespace = true;
613        for (int index = 0; index < strLen;) {
614            final int oldCodepoint = str.codePointAt(index);
615            final int newCodePoint;
616            if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) {
617                newCodePoint = Character.toLowerCase(oldCodepoint);
618                whitespace = false;
619            } else if (Character.isLowerCase(oldCodepoint)) {
620                if (whitespace) {
621                    newCodePoint = Character.toTitleCase(oldCodepoint);
622                    whitespace = false;
623                } else {
624                    newCodePoint = Character.toUpperCase(oldCodepoint);
625                }
626            } else {
627                whitespace = Character.isWhitespace(oldCodepoint);
628                newCodePoint = oldCodepoint;
629            }
630            newCodePoints[outOffset++] = newCodePoint;
631            index += Character.charCount(newCodePoint);
632        }
633        return new String(newCodePoints, 0, outOffset);
634    }
635
636    //-----------------------------------------------------------------------
637    /**
638     * <p>Extracts the initial characters from each word in the String.</p>
639     *
640     * <p>All first characters after whitespace are returned as a new string.
641     * Their case is not changed.</p>
642     *
643     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
644     * A <code>null</code> input String returns <code>null</code>.</p>
645     *
646     * <pre>
647     * WordUtils.initials(null)             = null
648     * WordUtils.initials("")               = ""
649     * WordUtils.initials("Ben John Lee")   = "BJL"
650     * WordUtils.initials("Ben J.Lee")      = "BJ"
651     * </pre>
652     *
653     * @param str  the String to get initials from, may be null
654     * @return String of initial letters, <code>null</code> if null String input
655     * @see #initials(String,char[])
656     */
657    public static String initials(final String str) {
658        return initials(str, null);
659    }
660
661    /**
662     * <p>Extracts the initial characters from each word in the String.</p>
663     *
664     * <p>All first characters after the defined delimiters are returned as a new string.
665     * Their case is not changed.</p>
666     *
667     * <p>If the delimiters array is null, then Whitespace is used.
668     * Whitespace is defined by {@link Character#isWhitespace(char)}.
669     * A <code>null</code> input String returns <code>null</code>.
670     * An empty delimiter array returns an empty String.</p>
671     *
672     * <pre>
673     * WordUtils.initials(null, *)                = null
674     * WordUtils.initials("", *)                  = ""
675     * WordUtils.initials("Ben John Lee", null)   = "BJL"
676     * WordUtils.initials("Ben J.Lee", null)      = "BJ"
677     * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
678     * WordUtils.initials(*, new char[0])         = ""
679     * </pre>
680     *
681     * @param str  the String to get initials from, may be null
682     * @param delimiters  set of characters to determine words, null means whitespace
683     * @return String of initial characters, <code>null</code> if null String input
684     * @see #initials(String)
685     */
686    public static String initials(final String str, final char... delimiters) {
687        if (StringUtils.isEmpty(str)) {
688            return str;
689        }
690        if (delimiters != null && delimiters.length == 0) {
691            return "";
692        }
693        final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
694        final int strLen = str.length();
695        final int[] newCodePoints = new int[strLen / 2 + 1];
696        int count = 0;
697        boolean lastWasGap = true;
698        for (int i = 0; i < strLen;) {
699            final int codePoint = str.codePointAt(i);
700
701            if (delimiterSet.contains(codePoint) || (delimiters == null && Character.isWhitespace(codePoint))) {
702                lastWasGap = true;
703            } else if (lastWasGap) {
704                newCodePoints[count++] = codePoint;
705                lastWasGap = false;
706            }
707
708            i += Character.charCount(codePoint);
709        }
710        return new String(newCodePoints, 0, count);
711    }
712
713    //-----------------------------------------------------------------------
714    /**
715     * <p>Checks if the String contains all words in the given array.</p>
716     *
717     * <p>
718     * A {@code null} String will return {@code false}. A {@code null}, zero
719     * length search array or if one element of array is null will return {@code false}.
720     * </p>
721     *
722     * <pre>
723     * WordUtils.containsAllWords(null, *)            = false
724     * WordUtils.containsAllWords("", *)              = false
725     * WordUtils.containsAllWords(*, null)            = false
726     * WordUtils.containsAllWords(*, [])              = false
727     * WordUtils.containsAllWords("abcd", "ab", "cd") = false
728     * WordUtils.containsAllWords("abc def", "def", "abc") = true
729     * </pre>
730     *
731     * @param word The CharSequence to check, may be null
732     * @param words The array of String words to search for, may be null
733     * @return {@code true} if all search words are found, {@code false} otherwise
734     */
735    public static boolean containsAllWords(final CharSequence word, final CharSequence... words) {
736        if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) {
737            return false;
738        }
739        for (final CharSequence w : words) {
740            if (StringUtils.isBlank(w)) {
741                return false;
742            }
743            final Pattern p = Pattern.compile(".*\\b" + w + "\\b.*");
744            if (!p.matcher(word).matches()) {
745                return false;
746            }
747        }
748        return true;
749    }
750
751    //-----------------------------------------------------------------------
752    /**
753     * Is the character a delimiter.
754     *
755     * @param ch the character to check
756     * @param delimiters the delimiters
757     * @return true if it is a delimiter
758     * @deprecated as of 1.2 and will be removed in 2.0
759     */
760    @Deprecated
761    public static boolean isDelimiter(final char ch, final char[] delimiters) {
762        if (delimiters == null) {
763            return Character.isWhitespace(ch);
764        }
765        for (final char delimiter : delimiters) {
766            if (ch == delimiter) {
767                return true;
768            }
769        }
770        return false;
771    }
772
773  //-----------------------------------------------------------------------
774    /**
775     * Is the codePoint a delimiter.
776     *
777     * @param codePoint the codePint to check
778     * @param delimiters the delimiters
779     * @return true if it is a delimiter
780     * @deprecated as of 1.2 and will be removed in 2.0
781     */
782    @Deprecated
783    public static boolean isDelimiter(final int codePoint, final char[] delimiters) {
784        if (delimiters == null) {
785            return Character.isWhitespace(codePoint);
786        }
787        for (int index = 0; index < delimiters.length; index++) {
788            final int delimiterCodePoint = Character.codePointAt(delimiters, index);
789            if (delimiterCodePoint == codePoint) {
790                return true;
791            }
792        }
793        return false;
794    }
795
796    //-----------------------------------------------------------------------
797    /**
798     * Abbreviates the words nicely.
799     *
800     * This method searches for the first space after the lower limit and abbreviates
801     * the String there. It will also append any String passed as a parameter
802     * to the end of the String. The upper limit can be specified to forcibly
803     * abbreviate a String.
804     *
805     * @param str         the string to be abbreviated. If null is passed, null is returned.
806     *                    If the empty String is passed, the empty string is returned.
807     * @param lower       the lower limit.
808     * @param upper       the upper limit; specify -1 if no limit is desired.
809     *                    If the upper limit is lower than the lower limit, it will be
810     *                    adjusted to be the same as the lower limit.
811     * @param appendToEnd String to be appended to the end of the abbreviated string.
812     *                    This is appended ONLY if the string was indeed abbreviated.
813     *                    The append does not count towards the lower or upper limits.
814     * @return the abbreviated String.
815     *
816     * <pre>
817     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null));     = "Now"
818     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null));    = "Now is the"
819     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null));    = "Now is the time for all"
820     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, ""));       = "Now"
821     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, ""));      = "Now is the"
822     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, ""));      = "Now is the time for all"
823     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ..."));   = "Now ..."
824     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ..."));  = "Now is the ..."
825     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ..."));  = "Now is the time for all ..."
826     * WordUtils.abbreviate("Now is the time for all good men", 0, -1, ""));       = "Now"
827     * WordUtils.abbreviate("Now is the time for all good men", 10, -1, ""));      = "Now is the"
828     * WordUtils.abbreviate("Now is the time for all good men", 20, -1, ""));      = "Now is the time for all"
829     * WordUtils.abbreviate("Now is the time for all good men", 50, -1, ""));      = "Now is the time for all good men"
830     * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, ""));    = "Now is the time for all good men"
831     * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null));    = IllegalArgumentException
832     * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null));     = IllegalArgumentException
833     * </pre>
834     */
835    public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) {
836        Validate.isTrue(upper >= -1, "upper value cannot be less than -1");
837        Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value");
838
839        if (StringUtils.isEmpty(str)) {
840            return str;
841        }
842
843        // if the lower value is greater than the length of the string,
844        // set to the length of the string
845        if (lower > str.length()) {
846            lower = str.length();
847        }
848
849        // if the upper value is -1 (i.e. no limit) or is greater
850        // than the length of the string, set to the length of the string
851        if (upper == -1 || upper > str.length()) {
852            upper = str.length();
853        }
854
855        final StringBuilder result = new StringBuilder();
856        final int index = StringUtils.indexOf(str, " ", lower);
857        if (index == -1) {
858            result.append(str, 0, upper);
859            // only if abbreviation has occured do we append the appendToEnd value
860            if (upper != str.length()) {
861                result.append(StringUtils.defaultString(appendToEnd));
862            }
863        } else if (index > upper) {
864            result.append(str, 0, upper);
865            result.append(StringUtils.defaultString(appendToEnd));
866        } else {
867            result.append(str, 0, index);
868            result.append(StringUtils.defaultString(appendToEnd));
869        }
870
871        return result.toString();
872    }
873
874    // -----------------------------------------------------------------------
875    /**
876     * <p>
877     * Converts an array of delimiters to a hash set of code points. Code point of space(32) is added as the default
878     * value if delimiters is null. The generated hash set provides O(1) lookup time.
879     * </p>
880     *
881     * @param delimiters set of characters to determine capitalization, null means whitespace
882     * @return Set<Integer>
883     */
884    private static Set<Integer> generateDelimiterSet(final char[] delimiters) {
885        final Set<Integer> delimiterHashSet = new HashSet<>();
886        if (delimiters == null || delimiters.length == 0) {
887            if (delimiters == null) {
888                delimiterHashSet.add(Character.codePointAt(new char[] {' '}, 0));
889            }
890
891            return delimiterHashSet;
892        }
893
894        for (int index = 0; index < delimiters.length; index++) {
895            delimiterHashSet.add(Character.codePointAt(delimiters, index));
896        }
897        return delimiterHashSet;
898    }
899 }