001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import org.apache.commons.lang3.StringUtils; 020import org.apache.commons.text.translate.AggregateTranslator; 021import org.apache.commons.text.translate.CharSequenceTranslator; 022import org.apache.commons.text.translate.CsvTranslators; 023import org.apache.commons.text.translate.EntityArrays; 024import org.apache.commons.text.translate.JavaUnicodeEscaper; 025import org.apache.commons.text.translate.LookupTranslator; 026import org.apache.commons.text.translate.NumericEntityEscaper; 027import org.apache.commons.text.translate.NumericEntityUnescaper; 028import org.apache.commons.text.translate.OctalUnescaper; 029import org.apache.commons.text.translate.UnicodeUnescaper; 030import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover; 031 032import java.io.IOException; 033import java.io.Writer; 034import java.util.Collections; 035import java.util.HashMap; 036import java.util.Map; 037 038/** 039 * <p>Escapes and unescapes {@code String}s for 040 * Java, Java Script, HTML and XML.</p> 041 * 042 * <p>#ThreadSafe#</p> 043 * 044 * 045 * <p> 046 * This code has been adapted from Apache Commons Lang 3.5. 047 * </p> 048 * 049 * @since 1.0 050 */ 051public class StringEscapeUtils { 052 053 /* ESCAPE TRANSLATORS */ 054 055 /** 056 * Translator object for escaping Java. 057 * 058 * While {@link #escapeJava(String)} is the expected method of use, this 059 * object allows the Java escaping functionality to be used 060 * as the foundation for a custom translator. 061 */ 062 public static final CharSequenceTranslator ESCAPE_JAVA; 063 static { 064 final Map<CharSequence, CharSequence> escapeJavaMap = new HashMap<>(); 065 escapeJavaMap.put("\"", "\\\""); 066 escapeJavaMap.put("\\", "\\\\"); 067 ESCAPE_JAVA = new AggregateTranslator( 068 new LookupTranslator(Collections.unmodifiableMap(escapeJavaMap)), 069 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 070 JavaUnicodeEscaper.outsideOf(32, 0x7f) 071 ); 072 } 073 074 /** 075 * Translator object for escaping EcmaScript/JavaScript. 076 * 077 * While {@link #escapeEcmaScript(String)} is the expected method of use, this 078 * object allows the EcmaScript escaping functionality to be used 079 * as the foundation for a custom translator. 080 */ 081 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT; 082 static { 083 final Map<CharSequence, CharSequence> escapeEcmaScriptMap = new HashMap<>(); 084 escapeEcmaScriptMap.put("'", "\\'"); 085 escapeEcmaScriptMap.put("\"", "\\\""); 086 escapeEcmaScriptMap.put("\\", "\\\\"); 087 escapeEcmaScriptMap.put("/", "\\/"); 088 ESCAPE_ECMASCRIPT = new AggregateTranslator( 089 new LookupTranslator(Collections.unmodifiableMap(escapeEcmaScriptMap)), 090 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 091 JavaUnicodeEscaper.outsideOf(32, 0x7f) 092 ); 093 } 094 095 /** 096 * Translator object for escaping Json. 097 * 098 * While {@link #escapeJson(String)} is the expected method of use, this 099 * object allows the Json escaping functionality to be used 100 * as the foundation for a custom translator. 101 */ 102 public static final CharSequenceTranslator ESCAPE_JSON; 103 static { 104 final Map<CharSequence, CharSequence> escapeJsonMap = new HashMap<>(); 105 escapeJsonMap.put("\"", "\\\""); 106 escapeJsonMap.put("\\", "\\\\"); 107 escapeJsonMap.put("/", "\\/"); 108 ESCAPE_JSON = new AggregateTranslator( 109 new LookupTranslator(Collections.unmodifiableMap(escapeJsonMap)), 110 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 111 JavaUnicodeEscaper.outsideOf(32, 0x7f) 112 ); 113 } 114 115 /** 116 * Translator object for escaping XML 1.0. 117 * 118 * While {@link #escapeXml10(String)} is the expected method of use, this 119 * object allows the XML escaping functionality to be used 120 * as the foundation for a custom translator. 121 */ 122 public static final CharSequenceTranslator ESCAPE_XML10; 123 static { 124 final Map<CharSequence, CharSequence> escapeXml10Map = new HashMap<>(); 125 escapeXml10Map.put("\u0000", StringUtils.EMPTY); 126 escapeXml10Map.put("\u0001", StringUtils.EMPTY); 127 escapeXml10Map.put("\u0002", StringUtils.EMPTY); 128 escapeXml10Map.put("\u0003", StringUtils.EMPTY); 129 escapeXml10Map.put("\u0004", StringUtils.EMPTY); 130 escapeXml10Map.put("\u0005", StringUtils.EMPTY); 131 escapeXml10Map.put("\u0006", StringUtils.EMPTY); 132 escapeXml10Map.put("\u0007", StringUtils.EMPTY); 133 escapeXml10Map.put("\u0008", StringUtils.EMPTY); 134 escapeXml10Map.put("\u000b", StringUtils.EMPTY); 135 escapeXml10Map.put("\u000c", StringUtils.EMPTY); 136 escapeXml10Map.put("\u000e", StringUtils.EMPTY); 137 escapeXml10Map.put("\u000f", StringUtils.EMPTY); 138 escapeXml10Map.put("\u0010", StringUtils.EMPTY); 139 escapeXml10Map.put("\u0011", StringUtils.EMPTY); 140 escapeXml10Map.put("\u0012", StringUtils.EMPTY); 141 escapeXml10Map.put("\u0013", StringUtils.EMPTY); 142 escapeXml10Map.put("\u0014", StringUtils.EMPTY); 143 escapeXml10Map.put("\u0015", StringUtils.EMPTY); 144 escapeXml10Map.put("\u0016", StringUtils.EMPTY); 145 escapeXml10Map.put("\u0017", StringUtils.EMPTY); 146 escapeXml10Map.put("\u0018", StringUtils.EMPTY); 147 escapeXml10Map.put("\u0019", StringUtils.EMPTY); 148 escapeXml10Map.put("\u001a", StringUtils.EMPTY); 149 escapeXml10Map.put("\u001b", StringUtils.EMPTY); 150 escapeXml10Map.put("\u001c", StringUtils.EMPTY); 151 escapeXml10Map.put("\u001d", StringUtils.EMPTY); 152 escapeXml10Map.put("\u001e", StringUtils.EMPTY); 153 escapeXml10Map.put("\u001f", StringUtils.EMPTY); 154 escapeXml10Map.put("\ufffe", StringUtils.EMPTY); 155 escapeXml10Map.put("\uffff", StringUtils.EMPTY); 156 ESCAPE_XML10 = new AggregateTranslator( 157 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 158 new LookupTranslator(EntityArrays.APOS_ESCAPE), 159 new LookupTranslator(Collections.unmodifiableMap(escapeXml10Map)), 160 NumericEntityEscaper.between(0x7f, 0x84), 161 NumericEntityEscaper.between(0x86, 0x9f), 162 new UnicodeUnpairedSurrogateRemover() 163 ); 164 } 165 166 /** 167 * Translator object for escaping XML 1.1. 168 * 169 * While {@link #escapeXml11(String)} is the expected method of use, this 170 * object allows the XML escaping functionality to be used 171 * as the foundation for a custom translator. 172 */ 173 public static final CharSequenceTranslator ESCAPE_XML11; 174 static { 175 final Map<CharSequence, CharSequence> escapeXml11Map = new HashMap<>(); 176 escapeXml11Map.put("\u0000", StringUtils.EMPTY); 177 escapeXml11Map.put("\u000b", ""); 178 escapeXml11Map.put("\u000c", ""); 179 escapeXml11Map.put("\ufffe", StringUtils.EMPTY); 180 escapeXml11Map.put("\uffff", StringUtils.EMPTY); 181 ESCAPE_XML11 = new AggregateTranslator( 182 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 183 new LookupTranslator(EntityArrays.APOS_ESCAPE), 184 new LookupTranslator(Collections.unmodifiableMap(escapeXml11Map)), 185 NumericEntityEscaper.between(0x1, 0x8), 186 NumericEntityEscaper.between(0xe, 0x1f), 187 NumericEntityEscaper.between(0x7f, 0x84), 188 NumericEntityEscaper.between(0x86, 0x9f), 189 new UnicodeUnpairedSurrogateRemover() 190 ); 191 } 192 193 /** 194 * Translator object for escaping HTML version 3.0. 195 * 196 * While {@link #escapeHtml3(String)} is the expected method of use, this 197 * object allows the HTML escaping functionality to be used 198 * as the foundation for a custom translator. 199 */ 200 public static final CharSequenceTranslator ESCAPE_HTML3 = 201 new AggregateTranslator( 202 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 203 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE) 204 ); 205 206 /** 207 * Translator object for escaping HTML version 4.0. 208 * 209 * While {@link #escapeHtml4(String)} is the expected method of use, this 210 * object allows the HTML escaping functionality to be used 211 * as the foundation for a custom translator. 212 */ 213 public static final CharSequenceTranslator ESCAPE_HTML4 = 214 new AggregateTranslator( 215 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 216 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE), 217 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE) 218 ); 219 220 /** 221 * Translator object for escaping individual Comma Separated Values. 222 * 223 * While {@link #escapeCsv(String)} is the expected method of use, this 224 * object allows the CSV escaping functionality to be used 225 * as the foundation for a custom translator. 226 */ 227 public static final CharSequenceTranslator ESCAPE_CSV = new CsvTranslators.CsvEscaper(); 228 229 /** 230 * Translator object for escaping Shell command language. 231 * 232 * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a> 233 */ 234 public static final CharSequenceTranslator ESCAPE_XSI; 235 static { 236 final Map<CharSequence, CharSequence> escapeXsiMap = new HashMap<>(); 237 escapeXsiMap.put("|", "\\|"); 238 escapeXsiMap.put("&", "\\&"); 239 escapeXsiMap.put(";", "\\;"); 240 escapeXsiMap.put("<", "\\<"); 241 escapeXsiMap.put(">", "\\>"); 242 escapeXsiMap.put("(", "\\("); 243 escapeXsiMap.put(")", "\\)"); 244 escapeXsiMap.put("$", "\\$"); 245 escapeXsiMap.put("`", "\\`"); 246 escapeXsiMap.put("\\", "\\\\"); 247 escapeXsiMap.put("\"", "\\\""); 248 escapeXsiMap.put("'", "\\'"); 249 escapeXsiMap.put(" ", "\\ "); 250 escapeXsiMap.put("\t", "\\\t"); 251 escapeXsiMap.put("\r\n", ""); 252 escapeXsiMap.put("\n", ""); 253 escapeXsiMap.put("*", "\\*"); 254 escapeXsiMap.put("?", "\\?"); 255 escapeXsiMap.put("[", "\\["); 256 escapeXsiMap.put("#", "\\#"); 257 escapeXsiMap.put("~", "\\~"); 258 escapeXsiMap.put("=", "\\="); 259 escapeXsiMap.put("%", "\\%"); 260 ESCAPE_XSI = new LookupTranslator( 261 Collections.unmodifiableMap(escapeXsiMap) 262 ); 263 } 264 265 /* UNESCAPE TRANSLATORS */ 266 267 /** 268 * Translator object for unescaping escaped Java. 269 * 270 * While {@link #unescapeJava(String)} is the expected method of use, this 271 * object allows the Java unescaping functionality to be used 272 * as the foundation for a custom translator. 273 */ 274 public static final CharSequenceTranslator UNESCAPE_JAVA; 275 static { 276 final Map<CharSequence, CharSequence> unescapeJavaMap = new HashMap<>(); 277 unescapeJavaMap.put("\\\\", "\\"); 278 unescapeJavaMap.put("\\\"", "\""); 279 unescapeJavaMap.put("\\'", "'"); 280 unescapeJavaMap.put("\\", ""); 281 UNESCAPE_JAVA = new AggregateTranslator( 282 new OctalUnescaper(), // .between('\1', '\377'), 283 new UnicodeUnescaper(), 284 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE), 285 new LookupTranslator(Collections.unmodifiableMap(unescapeJavaMap)) 286 ); 287 } 288 289 /** 290 * Translator object for unescaping escaped EcmaScript. 291 * 292 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 293 * object allows the EcmaScript unescaping functionality to be used 294 * as the foundation for a custom translator. 295 */ 296 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 297 298 /** 299 * Translator object for unescaping escaped Json. 300 * 301 * While {@link #unescapeJson(String)} is the expected method of use, this 302 * object allows the Json unescaping functionality to be used 303 * as the foundation for a custom translator. 304 */ 305 public static final CharSequenceTranslator UNESCAPE_JSON; 306 static { 307 UNESCAPE_JSON = new AggregateTranslator( 308 new OctalUnescaper(), // .between('\1', '\377'), 309 new UnicodeUnescaper(), 310 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE) 311 ); 312 } 313 314 /** 315 * Translator object for unescaping escaped HTML 3.0. 316 * 317 * While {@link #unescapeHtml3(String)} is the expected method of use, this 318 * object allows the HTML unescaping functionality to be used 319 * as the foundation for a custom translator. 320 */ 321 public static final CharSequenceTranslator UNESCAPE_HTML3 = 322 new AggregateTranslator( 323 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 324 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE), 325 new NumericEntityUnescaper() 326 ); 327 328 /** 329 * Translator object for unescaping escaped HTML 4.0. 330 * 331 * While {@link #unescapeHtml4(String)} is the expected method of use, this 332 * object allows the HTML unescaping functionality to be used 333 * as the foundation for a custom translator. 334 */ 335 public static final CharSequenceTranslator UNESCAPE_HTML4 = 336 new AggregateTranslator( 337 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 338 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE), 339 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE), 340 new NumericEntityUnescaper() 341 ); 342 343 /** 344 * Translator object for unescaping escaped XML. 345 * 346 * While {@link #unescapeXml(String)} is the expected method of use, this 347 * object allows the XML unescaping functionality to be used 348 * as the foundation for a custom translator. 349 */ 350 public static final CharSequenceTranslator UNESCAPE_XML = 351 new AggregateTranslator( 352 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 353 new LookupTranslator(EntityArrays.APOS_UNESCAPE), 354 new NumericEntityUnescaper() 355 ); 356 357 /** 358 * Translator object for unescaping escaped Comma Separated Value entries. 359 * 360 * While {@link #unescapeCsv(String)} is the expected method of use, this 361 * object allows the CSV unescaping functionality to be used 362 * as the foundation for a custom translator. 363 */ 364 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvTranslators.CsvUnescaper(); 365 366 /** 367 * Translator object for unescaping escaped XSI Value entries. 368 * 369 * While {@link #unescapeXSI(String)} is the expected method of use, this 370 * object allows the XSI unescaping functionality to be used 371 * as the foundation for a custom translator. 372 */ 373 public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper(); 374 375 /** 376 * Translator object for unescaping backslash escaped entries. 377 */ 378 static class XsiUnescaper extends CharSequenceTranslator { 379 380 /** 381 * Escaped backslash constant. 382 */ 383 private static final char BACKSLASH = '\\'; 384 385 @Override 386 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 387 388 if (index != 0) { 389 throw new IllegalStateException("XsiUnescaper should never reach the [1] index"); 390 } 391 392 final String s = input.toString(); 393 394 int segmentStart = 0; 395 int searchOffset = 0; 396 while (true) { 397 final int pos = s.indexOf(BACKSLASH, searchOffset); 398 if (pos == -1) { 399 if (segmentStart < s.length()) { 400 out.write(s.substring(segmentStart)); 401 } 402 break; 403 } 404 if (pos > segmentStart) { 405 out.write(s.substring(segmentStart, pos)); 406 } 407 segmentStart = pos + 1; 408 searchOffset = pos + 2; 409 } 410 411 return Character.codePointCount(input, 0, input.length()); 412 } 413 } 414 415 /* Helper functions */ 416 417 /** 418 * <p>{@code StringEscapeUtils} instances should NOT be constructed in 419 * standard programming.</p> 420 * 421 * <p>Instead, the class should be used as:</p> 422 * <pre>StringEscapeUtils.escapeJava("foo");</pre> 423 * 424 * <p>This constructor is public to permit tools that require a JavaBean 425 * instance to operate.</p> 426 */ 427 public StringEscapeUtils() { 428 super(); 429 } 430 431 /** 432 * <p>Convenience wrapper for {@link java.lang.StringBuilder} providing escape methods.</p> 433 * 434 * <p>Example:</p> 435 * <pre> 436 * new Builder(ESCAPE_HTML4) 437 * .append("<p>") 438 * .escape("This is paragraph 1 and special chars like & get escaped.") 439 * .append("</p><p>") 440 * .escape("This is paragraph 2 & more...") 441 * .append("</p>") 442 * .toString() 443 * </pre> 444 * 445 */ 446 public static final class Builder { 447 448 /** 449 * StringBuilder to be used in the Builder class. 450 */ 451 private final StringBuilder sb; 452 453 /** 454 * CharSequenceTranslator to be used in the Builder class. 455 */ 456 private final CharSequenceTranslator translator; 457 458 /** 459 * Builder constructor. 460 * 461 * @param translator a CharSequenceTranslator. 462 */ 463 private Builder(final CharSequenceTranslator translator) { 464 this.sb = new StringBuilder(); 465 this.translator = translator; 466 } 467 468 /** 469 * <p>Escape {@code input} according to the given {@link CharSequenceTranslator}.</p> 470 * 471 * @param input the String to escape 472 * @return {@code this}, to enable chaining 473 */ 474 public Builder escape(final String input) { 475 sb.append(translator.translate(input)); 476 return this; 477 } 478 479 /** 480 * Literal append, no escaping being done. 481 * 482 * @param input the String to append 483 * @return {@code this}, to enable chaining 484 */ 485 public Builder append(final String input) { 486 sb.append(input); 487 return this; 488 } 489 490 /** 491 * <p>Return the escaped string.</p> 492 * 493 * @return the escaped string 494 */ 495 @Override 496 public String toString() { 497 return sb.toString(); 498 } 499 } 500 501 /** 502 * Get a {@link Builder}. 503 * @param translator the text translator 504 * @return {@link Builder} 505 */ 506 public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) { 507 return new Builder(translator); 508 } 509 510 // Java and JavaScript 511 //-------------------------------------------------------------------------- 512 /** 513 * <p>Escapes the characters in a {@code String} using Java String rules.</p> 514 * 515 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 516 * 517 * <p>So a tab becomes the characters {@code '\\'} and 518 * {@code 't'}.</p> 519 * 520 * <p>The only difference between Java strings and JavaScript strings 521 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> 522 * 523 * <p>Example:</p> 524 * <pre> 525 * input string: He didn't say, "Stop!" 526 * output string: He didn't say, \"Stop!\" 527 * </pre> 528 * 529 * @param input String to escape values in, may be null 530 * @return String with escaped values, {@code null} if null string input 531 */ 532 public static final String escapeJava(final String input) { 533 return ESCAPE_JAVA.translate(input); 534 } 535 536 /** 537 * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p> 538 * <p>Escapes any values it finds into their EcmaScript String form. 539 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 540 * 541 * <p>So a tab becomes the characters {@code '\\'} and 542 * {@code 't'}.</p> 543 * 544 * <p>The only difference between Java strings and EcmaScript strings 545 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> 546 * 547 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. </p> 548 * 549 * <p>Example:</p> 550 * <pre> 551 * input string: He didn't say, "Stop!" 552 * output string: He didn\'t say, \"Stop!\" 553 * </pre> 554 * 555 * <b>Security Note.</b> We only provide backslash escaping in this method. For example, {@code '\"'} has the output 556 * {@code '\\\"'} which could result in potential issues in the case where the string being escaped is being used 557 * in an HTML tag like {@code <select onmouseover="..." />}. If you wish to have more rigorous string escaping, you 558 * may consider the 559 * <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI Libraries</a>. 560 * Further, you can view the <a href="https://github.com/esapi">ESAPI GitHub Org</a>. 561 * 562 * @param input String to escape values in, may be null 563 * @return String with escaped values, {@code null} if null string input 564 */ 565 public static final String escapeEcmaScript(final String input) { 566 return ESCAPE_ECMASCRIPT.translate(input); 567 } 568 569 /** 570 * <p>Escapes the characters in a {@code String} using Json String rules.</p> 571 * <p>Escapes any values it finds into their Json String form. 572 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 573 * 574 * <p>So a tab becomes the characters {@code '\\'} and 575 * {@code 't'}.</p> 576 * 577 * <p>The only difference between Java strings and Json strings 578 * is that in Json, forward-slash (/) is escaped.</p> 579 * 580 * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details. </p> 581 * 582 * <p>Example:</p> 583 * <pre> 584 * input string: He didn't say, "Stop!" 585 * output string: He didn't say, \"Stop!\" 586 * </pre> 587 * 588 * @param input String to escape values in, may be null 589 * @return String with escaped values, {@code null} if null string input 590 */ 591 public static final String escapeJson(final String input) { 592 return ESCAPE_JSON.translate(input); 593 } 594 595 /** 596 * <p>Unescapes any Java literals found in the {@code String}. 597 * For example, it will turn a sequence of {@code '\'} and 598 * {@code 'n'} into a newline character, unless the {@code '\'} 599 * is preceded by another {@code '\'}.</p> 600 * 601 * @param input the {@code String} to unescape, may be null 602 * @return a new unescaped {@code String}, {@code null} if null string input 603 */ 604 public static final String unescapeJava(final String input) { 605 return UNESCAPE_JAVA.translate(input); 606 } 607 608 /** 609 * <p>Unescapes any EcmaScript literals found in the {@code String}.</p> 610 * 611 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 612 * into a newline character, unless the {@code '\'} is preceded by another 613 * {@code '\'}.</p> 614 * 615 * @see #unescapeJava(String) 616 * @param input the {@code String} to unescape, may be null 617 * @return A new unescaped {@code String}, {@code null} if null string input 618 */ 619 public static final String unescapeEcmaScript(final String input) { 620 return UNESCAPE_ECMASCRIPT.translate(input); 621 } 622 623 /** 624 * <p>Unescapes any Json literals found in the {@code String}.</p> 625 * 626 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 627 * into a newline character, unless the {@code '\'} is preceded by another 628 * {@code '\'}.</p> 629 * 630 * @see #unescapeJava(String) 631 * @param input the {@code String} to unescape, may be null 632 * @return A new unescaped {@code String}, {@code null} if null string input 633 */ 634 public static final String unescapeJson(final String input) { 635 return UNESCAPE_JSON.translate(input); 636 } 637 638 // HTML and XML 639 //-------------------------------------------------------------------------- 640 /** 641 * <p>Escapes the characters in a {@code String} using HTML entities.</p> 642 * 643 * <p> 644 * For example: 645 * </p> 646 * <p><code>"bread" & "butter"</code></p> 647 * becomes: 648 * <p> 649 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. 650 * </p> 651 * 652 * <p>Supports all known HTML 4.0 entities, including funky accents. 653 * Note that the commonly used apostrophe escape character (&apos;) 654 * is not a legal entity and so is not supported). </p> 655 * 656 * @param input the {@code String} to escape, may be null 657 * @return a new escaped {@code String}, {@code null} if null string input 658 * 659 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 660 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 661 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 662 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 663 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 664 */ 665 public static final String escapeHtml4(final String input) { 666 return ESCAPE_HTML4.translate(input); 667 } 668 669 /** 670 * <p>Escapes the characters in a {@code String} using HTML entities.</p> 671 * <p>Supports only the HTML 3.0 entities. </p> 672 * 673 * @param input the {@code String} to escape, may be null 674 * @return a new escaped {@code String}, {@code null} if null string input 675 */ 676 public static final String escapeHtml3(final String input) { 677 return ESCAPE_HTML3.translate(input); 678 } 679 680 //----------------------------------------------------------------------- 681 /** 682 * <p>Unescapes a string containing entity escapes to a string 683 * containing the actual Unicode characters corresponding to the 684 * escapes. Supports HTML 4.0 entities.</p> 685 * 686 * <p>For example, the string {@code "<Français>"} 687 * will become {@code "<Fran�ais>"}</p> 688 * 689 * <p>If an entity is unrecognized, it is left alone, and inserted 690 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will 691 * become {@code ">&zzzz;x"}.</p> 692 * 693 * @param input the {@code String} to unescape, may be null 694 * @return a new unescaped {@code String}, {@code null} if null string input 695 */ 696 public static final String unescapeHtml4(final String input) { 697 return UNESCAPE_HTML4.translate(input); 698 } 699 700 /** 701 * <p>Unescapes a string containing entity escapes to a string 702 * containing the actual Unicode characters corresponding to the 703 * escapes. Supports only HTML 3.0 entities.</p> 704 * 705 * @param input the {@code String} to unescape, may be null 706 * @return a new unescaped {@code String}, {@code null} if null string input 707 */ 708 public static final String unescapeHtml3(final String input) { 709 return UNESCAPE_HTML3.translate(input); 710 } 711 712 /** 713 * <p>Escapes the characters in a {@code String} using XML entities.</p> 714 * 715 * <p>For example: {@code "bread" & "butter"} => 716 * {@code "bread" & "butter"}. 717 * </p> 718 * 719 * <p>Note that XML 1.0 is a text-only format: it cannot represent control 720 * characters or unpaired Unicode surrogate codepoints, even after escaping. 721 * {@code escapeXml10} will remove characters that do not fit in the 722 * following ranges:</p> 723 * 724 * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 725 * 726 * <p>Though not strictly necessary, {@code escapeXml10} will escape 727 * characters in the following ranges:</p> 728 * 729 * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p> 730 * 731 * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1 732 * document. If you want to allow more non-text characters in an XML 1.1 733 * document, use {@link #escapeXml11(String)}.</p> 734 * 735 * @param input the {@code String} to escape, may be null 736 * @return a new escaped {@code String}, {@code null} if null string input 737 * @see #unescapeXml(java.lang.String) 738 */ 739 public static String escapeXml10(final String input) { 740 return ESCAPE_XML10.translate(input); 741 } 742 743 /** 744 * <p>Escapes the characters in a {@code String} using XML entities.</p> 745 * 746 * <p>For example: {@code "bread" & "butter"} => 747 * {@code "bread" & "butter"}. 748 * </p> 749 * 750 * <p>XML 1.1 can represent certain control characters, but it cannot represent 751 * the null byte or unpaired Unicode surrogate codepoints, even after escaping. 752 * {@code escapeXml11} will remove characters that do not fit in the following 753 * ranges:</p> 754 * 755 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 756 * 757 * <p>{@code escapeXml11} will escape characters in the following ranges:</p> 758 * 759 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p> 760 * 761 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not 762 * use it for XML 1.0 documents.</p> 763 * 764 * @param input the {@code String} to escape, may be null 765 * @return a new escaped {@code String}, {@code null} if null string input 766 * @see #unescapeXml(java.lang.String) 767 */ 768 public static String escapeXml11(final String input) { 769 return ESCAPE_XML11.translate(input); 770 } 771 772 //----------------------------------------------------------------------- 773 /** 774 * <p>Unescapes a string containing XML entity escapes to a string 775 * containing the actual Unicode characters corresponding to the 776 * escapes.</p> 777 * 778 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 779 * Does not support DTDs or external entities.</p> 780 * 781 * <p>Note that numerical \\u Unicode codes are unescaped to their respective 782 * Unicode characters. This may change in future releases. </p> 783 * 784 * @param input the {@code String} to unescape, may be null 785 * @return a new unescaped {@code String}, {@code null} if null string input 786 * @see #escapeXml10(String) 787 * @see #escapeXml11(String) 788 */ 789 public static final String unescapeXml(final String input) { 790 return UNESCAPE_XML.translate(input); 791 } 792 793 //----------------------------------------------------------------------- 794 795 /** 796 * <p>Returns a {@code String} value for a CSV column enclosed in double quotes, 797 * if required.</p> 798 * 799 * <p>If the value contains a comma, newline or double quote, then the 800 * String value is returned enclosed in double quotes.</p> 801 * 802 * <p>Any double quote characters in the value are escaped with another double quote.</p> 803 * 804 * <p>If the value does not contain a comma, newline or double quote, then the 805 * String value is returned unchanged.</p> 806 * 807 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 808 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 809 * 810 * @param input the input CSV column String, may be null 811 * @return the input String, enclosed in double quotes if the value contains a comma, 812 * newline or double quote, {@code null} if null string input 813 */ 814 public static final String escapeCsv(final String input) { 815 return ESCAPE_CSV.translate(input); 816 } 817 818 /** 819 * <p>Returns a {@code String} value for an unescaped CSV column. </p> 820 * 821 * <p>If the value is enclosed in double quotes, and contains a comma, newline 822 * or double quote, then quotes are removed. 823 * </p> 824 * 825 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 826 * to just one double quote. </p> 827 * 828 * <p>If the value is not enclosed in double quotes, or is and does not contain a 829 * comma, newline or double quote, then the String value is returned unchanged.</p> 830 * 831 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 832 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 833 * 834 * @param input the input CSV column String, may be null 835 * @return the input String, with enclosing double quotes removed and embedded double 836 * quotes unescaped, {@code null} if null string input 837 */ 838 public static final String unescapeCsv(final String input) { 839 return UNESCAPE_CSV.translate(input); 840 } 841 842 /** 843 * <p>Escapes the characters in a {@code String} using XSI rules.</p> 844 * 845 * <p><b>Beware!</b> In most cases you don't want to escape shell commands but use multi-argument 846 * methods provided by {@link java.lang.ProcessBuilder} or {@link java.lang.Runtime#exec(String[])} 847 * instead.</p> 848 * 849 * <p>Example:</p> 850 * <pre> 851 * input string: He didn't say, "Stop!" 852 * output string: He\ didn\'t\ say,\ \"Stop!\" 853 * </pre> 854 * 855 * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a> 856 * @param input String to escape values in, may be null 857 * @return String with escaped values, {@code null} if null string input 858 */ 859 public static final String escapeXSI(final String input) { 860 return ESCAPE_XSI.translate(input); 861 } 862 863 /** 864 * <p>Unescapes the characters in a {@code String} using XSI rules.</p> 865 * 866 * @see StringEscapeUtils#escapeXSI(String) 867 * @param input the {@code String} to unescape, may be null 868 * @return a new unescaped {@code String}, {@code null} if null string input 869 */ 870 public static final String unescapeXSI(final String input) { 871 return UNESCAPE_XSI.translate(input); 872 } 873 874}