001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.jexl2.parser; 018 019 /** 020 * Common constant strings utilities. 021 * <p> 022 * This package methods read JEXL string literals and handle escaping through the 023 * 'backslash' (ie: \) character. Escaping is used to neutralize string delimiters (the single 024 * and double quotes) and read Unicode hexadecimal encoded characters. 025 * </p> 026 * <p> 027 * The only escapable characters are the single and double quotes - ''' and '"' -, 028 * a Unicode sequence starting with 'u' followed by 4 hexadecimals and 029 * the backslash character - '\' - itself. 030 * </p> 031 * <p> 032 * A sequence where '\' occurs before any non-escapable character or sequence has no effect, the 033 * sequence output being the same as the input. 034 * </p> 035 */ 036 public class StringParser { 037 /** Default constructor. */ 038 public StringParser() {} 039 040 /** 041 * Builds a string, handles escaping through '\' syntax. 042 * @param str the string to build from 043 * @param eatsep whether the separator, the first character, should be considered 044 * @return the built string 045 */ 046 public static String buildString(CharSequence str, boolean eatsep) { 047 StringBuilder strb = new StringBuilder(str.length()); 048 char sep = eatsep ? str.charAt(0) : 0; 049 int end = str.length() - (eatsep ? 1 : 0); 050 int begin = (eatsep ? 1 : 0); 051 read(strb, str, begin, end, sep); 052 return strb.toString(); 053 } 054 055 /** 056 * Read the remainder of a string till a given separator, 057 * handles escaping through '\' syntax. 058 * @param strb the destination buffer to copy characters into 059 * @param str the origin 060 * @param index the offset into the origin 061 * @param sep the separator, single or double quote, marking end of string 062 * @return the offset in origin 063 */ 064 public static int readString(StringBuilder strb, CharSequence str, int index, char sep) { 065 return read(strb, str, index, str.length(), sep); 066 } 067 068 /** 069 * Read the remainder of a string till a given separator, 070 * handles escaping through '\' syntax. 071 * @param strb the destination buffer to copy characters into 072 * @param str the origin 073 * @param begin the relative offset in str to begin reading 074 * @param end the relative offset in str to end reading 075 * @param sep the separator, single or double quote, marking end of string 076 * @return the last character offset handled in origin 077 */ 078 private static int read(StringBuilder strb, CharSequence str, int begin, int end, char sep) { 079 boolean escape = false; 080 int index = begin; 081 for (; index < end; ++index) { 082 char c = str.charAt(index); 083 if (escape) { 084 if (c == 'u' && (index + 4) < end && readUnicodeChar(strb, str, index + 1) > 0) { 085 index += 4; 086 } 087 else { 088 // if c is not an escapable character, re-emmit the backslash before it 089 boolean notSeparator = sep == 0? c != '\'' && c != '"' : c != sep; 090 if (notSeparator && c != '\\' ) { 091 strb.append('\\'); 092 } 093 strb.append(c); 094 } 095 escape = false; 096 continue; 097 } 098 if (c == '\\') { 099 escape = true; 100 continue; 101 } 102 strb.append(c); 103 if (c == sep) { 104 break; 105 } 106 } 107 return index; 108 } 109 110 /** 111 * Reads a Unicode escape character. 112 * @param strb the builder to write the character to 113 * @param str the sequence 114 * @param begin the begin offset in sequence (after the '\\u') 115 * @return 0 if char could not be read, 4 otherwise 116 */ 117 private static final int readUnicodeChar(StringBuilder strb, CharSequence str, int begin) { 118 char xc = 0; 119 int bits = 12; 120 int value = 0; 121 for(int offset = 0; offset < 4; ++offset) { 122 char c = str.charAt(begin + offset); 123 if (c >= '0' && c <= '9') { 124 value = (c - '0'); 125 } 126 else if (c >= 'a' && c <= 'h') { 127 value = (c - 'a' + 10); 128 } 129 else if (c >= 'A' && c <= 'H') { 130 value = (c - 'A' + 10); 131 } 132 else { 133 return 0; 134 } 135 xc |= value << bits; 136 bits -= 4; 137 } 138 strb.append(xc); 139 return 4; 140 } 141 142 /** 143 * Escapes a String representation, expand non-ASCII characters as Unicode escape sequence. 144 * @param str the string to escape 145 * @return the escaped representation 146 */ 147 public static String escapeString(String str) { 148 if (str == null) { 149 return null; 150 } 151 final int length = str.length(); 152 StringBuilder strb = new StringBuilder(length + 2); 153 strb.append('\''); 154 for (int i = 0; i < length; ++i) { 155 char c = str.charAt(i); 156 if (c < 127) { 157 if (c == '\'') { 158 // escape quote 159 strb.append('\\'); 160 strb.append('\''); 161 } else if (c == '\\') { 162 // escape backslash 163 strb.append('\\'); 164 strb.append('\\'); 165 } else { 166 strb.append(c); 167 } 168 } else { 169 // convert to Unicode escape sequence 170 strb.append('\\'); 171 strb.append('u'); 172 String hex = Integer.toHexString(c); 173 for (int h = hex.length(); h < 4; ++h) { 174 strb.append('0'); 175 } 176 strb.append(hex); 177 } 178 } 179 strb.append('\''); 180 return strb.toString(); 181 } 182 }