001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.configuration2.convert;
018
019import java.util.Collection;
020import java.util.LinkedList;
021import java.util.List;
022
023import org.apache.commons.lang3.StringUtils;
024
025/**
026 * <p>
027 * The default implementation of the {@code ListDelimiterHandler} interface.
028 * </p>
029 * <p>
030 * This class supports list splitting and delimiter escaping using a delimiter
031 * character that can be specified when constructing an instance. Splitting of
032 * strings works by scanning the input for the list delimiter character. The
033 * list delimiter character can be escaped by a backslash. So, provided that a
034 * comma is configured as list delimiter, in the example {@code val1,val2,val3}
035 * three values are recognized. In {@code 3\,1415} the list delimiter is escaped
036 * so that only a single element is detected. (Note that when writing these
037 * examples in Java code, each backslash has to be doubled. This is also true
038 * for all other examples in this documentation.)
039 * </p>
040 * <p>
041 * Because the backslash has a special meaning as escaping character it is
042 * always treated in a special way. If it occurs as a normal character in a
043 * property value, it has to be escaped using another backslash (similar to the
044 * rules of the Java programming language). The following example shows the
045 * correct way to define windows network shares: {@code \\\\Server\\path}. Note
046 * that each backslash is doubled. When combining the list delimiter with
047 * backslashes the same escaping rules apply. For instance, in
048 * {@code C:\\Temp\\,D:\\data\\} the list delimiter is recognized; it is not
049 * escaped by the preceding backslash because this backslash is itself escaped.
050 * In contrast, {@code C:\\Temp\\\,D:\\data\\} defines a single element with a
051 * comma being part of the value; two backslashes after {@code Temp} result in a
052 * single one, the third backslash escapes the list delimiter.
053 * </p>
054 * <p>
055 * As can be seen, there are some constellations which are a bit tricky and
056 * cause a larger number of backslashes in sequence. Nevertheless, the escaping
057 * rules are consistent and do not cause ambiguous results.
058 * </p>
059 * <p>
060 * Implementation node: An instance of this class can safely be shared between
061 * multiple {@code Configuration} instances.
062 * </p>
063 *
064 * @version $Id: DefaultListDelimiterHandler.java 1790899 2017-04-10 21:56:46Z ggregory $
065 * @since 2.0
066 */
067public class DefaultListDelimiterHandler extends AbstractListDelimiterHandler
068{
069    /** Constant for the escape character. */
070    private static final char ESCAPE = '\\';
071
072    /**
073     * Constant for a buffer size for escaping strings. When a character is
074     * escaped the string becomes longer. Therefore, the output buffer is longer
075     * than the original string length. But we assume, that there are not too
076     * many characters that need to be escaped.
077     */
078    private static final int BUF_SIZE = 16;
079
080    /** Stores the list delimiter character. */
081    private final char delimiter;
082
083    /**
084     * Creates a new instance of {@code DefaultListDelimiterHandler} and sets
085     * the list delimiter character.
086     *
087     * @param listDelimiter the list delimiter character
088     */
089    public DefaultListDelimiterHandler(char listDelimiter)
090    {
091        delimiter = listDelimiter;
092    }
093
094    /**
095     * Returns the list delimiter character used by this instance.
096     *
097     * @return the list delimiter character
098     */
099    public char getDelimiter()
100    {
101        return delimiter;
102    }
103
104    @Override
105    public Object escapeList(List<?> values, ValueTransformer transformer)
106    {
107        Object[] escapedValues = new String[values.size()];
108        int idx = 0;
109        for (Object v : values)
110        {
111            escapedValues[idx++] = escape(v, transformer);
112        }
113        return StringUtils.join(escapedValues, getDelimiter());
114    }
115
116    @Override
117    protected String escapeString(String s)
118    {
119        StringBuilder buf = new StringBuilder(s.length() + BUF_SIZE);
120        for (int i = 0; i < s.length(); i++)
121        {
122            char c = s.charAt(i);
123            if (c == getDelimiter() || c == ESCAPE)
124            {
125                buf.append(ESCAPE);
126            }
127            buf.append(c);
128        }
129        return buf.toString();
130    }
131
132    /**
133     * {@inheritDoc} This implementation reverses the escaping done by the
134     * {@code escape()} methods of this class. However, it tries to be tolerant
135     * with unexpected escaping sequences: If after the escape character "\" no
136     * allowed character follows, both the backslash and the following character
137     * are output.
138     */
139    @Override
140    protected Collection<String> splitString(String s, boolean trim)
141    {
142        List<String> list = new LinkedList<>();
143        StringBuilder token = new StringBuilder();
144        boolean inEscape = false;
145
146        for (int i = 0; i < s.length(); i++)
147        {
148            char c = s.charAt(i);
149            if (inEscape)
150            {
151                // last character was the escape marker
152                // can current character be escaped?
153                if (c != getDelimiter() && c != ESCAPE)
154                {
155                    // no, also add escape character
156                    token.append(ESCAPE);
157                }
158                token.append(c);
159                inEscape = false;
160            }
161
162            else
163            {
164                if (c == getDelimiter())
165                {
166                    // found a list delimiter -> add token and
167                    // reset buffer
168                    String t = token.toString();
169                    if (trim)
170                    {
171                        t = t.trim();
172                    }
173                    list.add(t);
174                    token = new StringBuilder();
175                }
176                else if (c == ESCAPE)
177                {
178                    // potentially escape next character
179                    inEscape = true;
180                }
181                else
182                {
183                    token.append(c);
184                }
185            }
186        }
187
188        // Trailing delimiter?
189        if (inEscape)
190        {
191            token.append(ESCAPE);
192        }
193        // Add last token
194        String t = token.toString();
195        if (trim)
196        {
197            t = t.trim();
198        }
199        list.add(t);
200
201        return list;
202    }
203}