001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.configuration2.convert; 018 019import java.util.Collection; 020import java.util.LinkedList; 021import java.util.List; 022 023import org.apache.commons.lang3.StringUtils; 024 025/** 026 * <p> 027 * The default implementation of the {@code ListDelimiterHandler} interface. 028 * </p> 029 * <p> 030 * This class supports list splitting and delimiter escaping using a delimiter 031 * character that can be specified when constructing an instance. Splitting of 032 * strings works by scanning the input for the list delimiter character. The 033 * list delimiter character can be escaped by a backslash. So, provided that a 034 * comma is configured as list delimiter, in the example {@code val1,val2,val3} 035 * three values are recognized. In {@code 3\,1415} the list delimiter is escaped 036 * so that only a single element is detected. (Note that when writing these 037 * examples in Java code, each backslash has to be doubled. This is also true 038 * for all other examples in this documentation.) 039 * </p> 040 * <p> 041 * Because the backslash has a special meaning as escaping character it is 042 * always treated in a special way. If it occurs as a normal character in a 043 * property value, it has to be escaped using another backslash (similar to the 044 * rules of the Java programming language). The following example shows the 045 * correct way to define windows network shares: {@code \\\\Server\\path}. Note 046 * that each backslash is doubled. When combining the list delimiter with 047 * backslashes the same escaping rules apply. For instance, in 048 * {@code C:\\Temp\\,D:\\data\\} the list delimiter is recognized; it is not 049 * escaped by the preceding backslash because this backslash is itself escaped. 050 * In contrast, {@code C:\\Temp\\\,D:\\data\\} defines a single element with a 051 * comma being part of the value; two backslashes after {@code Temp} result in a 052 * single one, the third backslash escapes the list delimiter. 053 * </p> 054 * <p> 055 * As can be seen, there are some constellations which are a bit tricky and 056 * cause a larger number of backslashes in sequence. Nevertheless, the escaping 057 * rules are consistent and do not cause ambiguous results. 058 * </p> 059 * <p> 060 * Implementation node: An instance of this class can safely be shared between 061 * multiple {@code Configuration} instances. 062 * </p> 063 * 064 * @version $Id: DefaultListDelimiterHandler.java 1842194 2018-09-27 22:24:23Z ggregory $ 065 * @since 2.0 066 */ 067public class DefaultListDelimiterHandler extends AbstractListDelimiterHandler 068{ 069 /** Constant for the escape character. */ 070 private static final char ESCAPE = '\\'; 071 072 /** 073 * Constant for a buffer size for escaping strings. When a character is 074 * escaped the string becomes longer. Therefore, the output buffer is longer 075 * than the original string length. But we assume, that there are not too 076 * many characters that need to be escaped. 077 */ 078 private static final int BUF_SIZE = 16; 079 080 /** Stores the list delimiter character. */ 081 private final char delimiter; 082 083 /** 084 * Creates a new instance of {@code DefaultListDelimiterHandler} and sets 085 * the list delimiter character. 086 * 087 * @param listDelimiter the list delimiter character 088 */ 089 public DefaultListDelimiterHandler(final char listDelimiter) 090 { 091 delimiter = listDelimiter; 092 } 093 094 /** 095 * Returns the list delimiter character used by this instance. 096 * 097 * @return the list delimiter character 098 */ 099 public char getDelimiter() 100 { 101 return delimiter; 102 } 103 104 @Override 105 public Object escapeList(final List<?> values, final ValueTransformer transformer) 106 { 107 final Object[] escapedValues = new String[values.size()]; 108 int idx = 0; 109 for (final Object v : values) 110 { 111 escapedValues[idx++] = escape(v, transformer); 112 } 113 return StringUtils.join(escapedValues, getDelimiter()); 114 } 115 116 @Override 117 protected String escapeString(final String s) 118 { 119 final StringBuilder buf = new StringBuilder(s.length() + BUF_SIZE); 120 for (int i = 0; i < s.length(); i++) 121 { 122 final char c = s.charAt(i); 123 if (c == getDelimiter() || c == ESCAPE) 124 { 125 buf.append(ESCAPE); 126 } 127 buf.append(c); 128 } 129 return buf.toString(); 130 } 131 132 /** 133 * {@inheritDoc} This implementation reverses the escaping done by the 134 * {@code escape()} methods of this class. However, it tries to be tolerant 135 * with unexpected escaping sequences: If after the escape character "\" no 136 * allowed character follows, both the backslash and the following character 137 * are output. 138 */ 139 @Override 140 protected Collection<String> splitString(final String s, final boolean trim) 141 { 142 final List<String> list = new LinkedList<>(); 143 StringBuilder token = new StringBuilder(); 144 boolean inEscape = false; 145 146 for (int i = 0; i < s.length(); i++) 147 { 148 final char c = s.charAt(i); 149 if (inEscape) 150 { 151 // last character was the escape marker 152 // can current character be escaped? 153 if (c != getDelimiter() && c != ESCAPE) 154 { 155 // no, also add escape character 156 token.append(ESCAPE); 157 } 158 token.append(c); 159 inEscape = false; 160 } 161 162 else 163 { 164 if (c == getDelimiter()) 165 { 166 // found a list delimiter -> add token and 167 // reset buffer 168 String t = token.toString(); 169 if (trim) 170 { 171 t = t.trim(); 172 } 173 list.add(t); 174 token = new StringBuilder(); 175 } 176 else if (c == ESCAPE) 177 { 178 // potentially escape next character 179 inEscape = true; 180 } 181 else 182 { 183 token.append(c); 184 } 185 } 186 } 187 188 // Trailing delimiter? 189 if (inEscape) 190 { 191 token.append(ESCAPE); 192 } 193 // Add last token 194 String t = token.toString(); 195 if (trim) 196 { 197 t = t.trim(); 198 } 199 list.add(t); 200 201 return list; 202 } 203}