001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache license, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the license for the specific language governing permissions and
015 * limitations under the license.
016 */
017package org.apache.logging.log4j.core.layout;
018
019import java.io.UnsupportedEncodingException;
020import java.nio.charset.Charset;
021import java.nio.charset.StandardCharsets;
022
023import org.apache.logging.log4j.core.LogEvent;
024
025/**
026 * Abstract base class for Layouts that result in a String.
027 * <p>
028 * Since 2.4.1, this class has custom logic to convert ISO-8859-1 or US-ASCII Strings to byte[] arrays to improve
029 * performance: all characters are simply cast to bytes.
030 */
031/*
032 * Implementation note: prefer String.getBytes(String) to String.getBytes(Charset) for performance reasons. See
033 * https://issues.apache.org/jira/browse/LOG4J2-935 for details.
034 */
035public abstract class AbstractStringLayout extends AbstractLayout<String> {
036
037    /**
038     * Default length for new StringBuilder instances: {@value} .
039     */
040    protected static final int DEFAULT_STRING_BUILDER_SIZE = 1024;
041
042    private final static ThreadLocal<StringBuilder> threadLocal = new ThreadLocal<>();
043
044    private static final long serialVersionUID = 1L;
045
046    /**
047     * The charset for the formatted message.
048     */
049    // TODO: Charset is not serializable. Implement read/writeObject() ?
050    private final Charset charset;
051    private final String charsetName;
052    private final boolean useCustomEncoding;
053
054    protected AbstractStringLayout(final Charset charset) {
055        this(charset, null, null);
056    }
057
058    protected AbstractStringLayout(final Charset charset, final byte[] header, final byte[] footer) {
059        super(header, footer);
060        this.charset = charset == null ? StandardCharsets.UTF_8 : charset;
061        this.charsetName = this.charset.name();
062        useCustomEncoding = isPreJava8()
063                && (StandardCharsets.ISO_8859_1.equals(charset) || StandardCharsets.US_ASCII.equals(charset));
064    }
065
066    // LOG4J2-1151: If the built-in JDK 8 encoders are available we should use them.
067    private static boolean isPreJava8() {
068        final String version = System.getProperty("java.version");
069        final String[] parts = version.split("\\.");
070        try {
071            int major = Integer.parseInt(parts[1]);
072            return major < 8;
073        } catch (Exception ex) {
074            return true;
075        }
076    }
077
078    /**
079     * Converts a String to a byte[].
080     * 
081     * @param str if null, return null.
082     * @param charset if null, use the default charset.
083     * @return a byte[]
084     */
085    static byte[] toBytes(final String str, final Charset charset) {
086        if (str != null) {
087            if (StandardCharsets.ISO_8859_1.equals(charset)) {
088                return encodeSingleByteChars(str);
089            }
090            final Charset actual = charset != null ? charset : Charset.defaultCharset();
091            try { // LOG4J2-935: String.getBytes(String) gives better performance
092                return str.getBytes(actual.name());
093            } catch (UnsupportedEncodingException e) {
094                return str.getBytes(actual);
095            }
096        }
097        return null;
098    }
099
100    /**
101     * Returns a {@code StringBuilder} that this Layout implementation can use to write the formatted log event to.
102     * 
103     * @return a {@code StringBuilder}
104     */
105    protected StringBuilder getStringBuilder() {
106        StringBuilder result = threadLocal.get();
107        if (result == null) {
108            result = new StringBuilder(DEFAULT_STRING_BUILDER_SIZE);
109            threadLocal.set(result);
110        }
111        result.setLength(0);
112        return result;
113    }
114
115    protected byte[] getBytes(final String s) {
116        if (useCustomEncoding) { // rely on branch prediction to eliminate this check if false
117            return encodeSingleByteChars(s);
118        }
119        try { // LOG4J2-935: String.getBytes(String) gives better performance
120            return s.getBytes(charsetName);
121        } catch (UnsupportedEncodingException e) {
122            return s.getBytes(charset);
123        }
124    }
125
126    /**
127     * Encode the specified string by casting each character to a byte.
128     * 
129     * @param s the string to encode
130     * @return the encoded String
131     * @see https://issues.apache.org/jira/browse/LOG4J2-1151
132     */
133    private static byte[] encodeSingleByteChars(String s) {
134        final int length = s.length();
135        final byte[] result = new byte[length];
136        encodeString(s, 0, length, result);
137        return result;
138    }
139
140    // LOG4J2-1151
141    /*
142     * Implementation note: this is the fast path. If the char array contains only ISO-8859-1 characters, all the work
143     * will be done here.
144     */
145    private static int encodeIsoChars(String charArray, int charIndex, byte[] byteArray, int byteIndex, int length) {
146        int i = 0;
147        for (; i < length; i++) {
148            char c = charArray.charAt(charIndex++);
149            if (c > 255) {
150                break;
151            }
152            byteArray[(byteIndex++)] = ((byte) c);
153        }
154        return i;
155    }
156
157    // LOG4J2-1151
158    private static int encodeString(String charArray, int charOffset, int charLength, byte[] byteArray) {
159        int byteOffset = 0;
160        int length = Math.min(charLength, byteArray.length);
161        int charDoneIndex = charOffset + length;
162        while (charOffset < charDoneIndex) {
163            int done = encodeIsoChars(charArray, charOffset, byteArray, byteOffset, length);
164            charOffset += done;
165            byteOffset += done;
166            if (done != length) {
167                char c = charArray.charAt(charOffset++);
168                if ((Character.isHighSurrogate(c)) && (charOffset < charDoneIndex)
169                        && (Character.isLowSurrogate(charArray.charAt(charOffset)))) {
170                    if (charLength > byteArray.length) {
171                        charDoneIndex++;
172                        charLength--;
173                    }
174                    charOffset++;
175                }
176                byteArray[(byteOffset++)] = '?';
177                length = Math.min(charDoneIndex - charOffset, byteArray.length - byteOffset);
178            }
179        }
180        return byteOffset;
181    }
182
183    protected Charset getCharset() {
184        return charset;
185    }
186
187    /**
188     * @return The default content type for Strings.
189     */
190    @Override
191    public String getContentType() {
192        return "text/plain";
193    }
194
195    /**
196     * Formats the Log Event as a byte array.
197     *
198     * @param event The Log Event.
199     * @return The formatted event as a byte array.
200     */
201    @Override
202    public byte[] toByteArray(final LogEvent event) {
203        return getBytes(toSerializable(event));
204    }
205
206}