001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.output; 018 019import java.io.File; 020import java.io.FileNotFoundException; 021import java.io.FileOutputStream; 022import java.io.IOException; 023import java.io.OutputStream; 024import java.io.OutputStreamWriter; 025import java.io.StringWriter; 026import java.io.Writer; 027import java.nio.charset.Charset; 028import java.nio.charset.StandardCharsets; 029import java.util.Locale; 030import java.util.Objects; 031import java.util.regex.Matcher; 032 033import org.apache.commons.io.Charsets; 034import org.apache.commons.io.IOUtils; 035import org.apache.commons.io.build.AbstractStreamBuilder; 036import org.apache.commons.io.input.XmlStreamReader; 037 038/** 039 * Character stream that handles all the necessary work to figure out the 040 * charset encoding of the XML document written to the stream. 041 * 042 * @see XmlStreamReader 043 * @since 2.0 044 */ 045public class XmlStreamWriter extends Writer { 046 047 /** 048 * Builds a new {@link XmlStreamWriter} instance. 049 * <p> 050 * For example: 051 * </p> 052 * <pre>{@code 053 * WriterOutputStream w = WriterOutputStream.builder() 054 * .setPath(path) 055 * .setCharset(StandardCharsets.UTF_8) 056 * .get()} 057 * </pre> 058 * <p> 059 * @since 2.12.0 060 */ 061 public static class Builder extends AbstractStreamBuilder<XmlStreamWriter, Builder> { 062 063 public Builder() { 064 setCharsetDefault(StandardCharsets.UTF_8); 065 setCharset(StandardCharsets.UTF_8); 066 } 067 068 /** 069 * Constructs a new instance. 070 * 071 * @throws UnsupportedOperationException if the origin cannot be converted to an OutputStream. 072 */ 073 @SuppressWarnings("resource") 074 @Override 075 public XmlStreamWriter get() throws IOException { 076 return new XmlStreamWriter(getOrigin().getOutputStream(), getCharset()); 077 } 078 079 } 080 081 private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE; 082 083 /** 084 * Constructs a new {@link Builder}. 085 * 086 * @return a new {@link Builder}. 087 * @since 2.12.0 088 */ 089 public static Builder builder() { 090 return new Builder(); 091 } 092 093 private final OutputStream out; 094 095 private final Charset defaultCharset; 096 097 private StringWriter prologWriter = new StringWriter(BUFFER_SIZE); 098 099 private Writer writer; 100 101 private Charset charset; 102 103 /** 104 * Constructs a new XML stream writer for the specified file 105 * with a default encoding of UTF-8. 106 * 107 * @param file The file to write to 108 * @throws FileNotFoundException if there is an error creating or 109 * opening the file 110 * @deprecated Use {@link #builder()} 111 */ 112 @Deprecated 113 public XmlStreamWriter(final File file) throws FileNotFoundException { 114 this(file, null); 115 } 116 117 /** 118 * Constructs a new XML stream writer for the specified file 119 * with the specified default encoding. 120 * 121 * @param file The file to write to 122 * @param defaultEncoding The default encoding if not encoding could be detected 123 * @throws FileNotFoundException if there is an error creating or 124 * opening the file 125 * @deprecated Use {@link #builder()} 126 */ 127 @Deprecated 128 @SuppressWarnings("resource") 129 public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException { 130 this(new FileOutputStream(file), defaultEncoding); 131 } 132 133 /** 134 * Constructs a new XML stream writer for the specified output stream 135 * with a default encoding of UTF-8. 136 * 137 * @param out The output stream 138 * @deprecated Use {@link #builder()} 139 */ 140 @Deprecated 141 public XmlStreamWriter(final OutputStream out) { 142 this(out, StandardCharsets.UTF_8); 143 } 144 145 /** 146 * Constructs a new XML stream writer for the specified output stream 147 * with the specified default encoding. 148 * 149 * @param out The output stream 150 * @param defaultEncoding The default encoding if not encoding could be detected 151 */ 152 private XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) { 153 this.out = out; 154 this.defaultCharset = Objects.requireNonNull(defaultEncoding); 155 } 156 157 /** 158 * Constructs a new XML stream writer for the specified output stream 159 * with the specified default encoding. 160 * 161 * @param out The output stream 162 * @param defaultEncoding The default encoding if not encoding could be detected 163 * @deprecated Use {@link #builder()} 164 */ 165 @Deprecated 166 public XmlStreamWriter(final OutputStream out, final String defaultEncoding) { 167 this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8)); 168 } 169 170 /** 171 * Closes the underlying writer. 172 * 173 * @throws IOException if an error occurs closing the underlying writer 174 */ 175 @Override 176 public void close() throws IOException { 177 if (writer == null) { 178 charset = defaultCharset; 179 writer = new OutputStreamWriter(out, charset); 180 writer.write(prologWriter.toString()); 181 } 182 writer.close(); 183 } 184 185 /** 186 * Detects the encoding. 187 * 188 * @param cbuf the buffer to write the characters from 189 * @param off The start offset 190 * @param len The number of characters to write 191 * @throws IOException if an error occurs detecting the encoding 192 */ 193 private void detectEncoding(final char[] cbuf, final int off, final int len) 194 throws IOException { 195 int size = len; 196 final StringBuffer xmlProlog = prologWriter.getBuffer(); 197 if (xmlProlog.length() + len > BUFFER_SIZE) { 198 size = BUFFER_SIZE - xmlProlog.length(); 199 } 200 prologWriter.write(cbuf, off, size); 201 202 // try to determine encoding 203 if (xmlProlog.length() >= 5) { 204 if (xmlProlog.substring(0, 5).equals("<?xml")) { 205 // try to extract encoding from XML prolog 206 final int xmlPrologEnd = xmlProlog.indexOf("?>"); 207 if (xmlPrologEnd > 0) { 208 // ok, full XML prolog written: let's extract encoding 209 final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0, 210 xmlPrologEnd)); 211 if (m.find()) { 212 final String encName = m.group(1).toUpperCase(Locale.ROOT); 213 charset = Charset.forName(encName.substring(1, encName.length() - 1)); 214 } else { 215 // no encoding found in XML prolog: using default 216 // encoding 217 charset = defaultCharset; 218 } 219 } else if (xmlProlog.length() >= BUFFER_SIZE) { 220 // no encoding found in first characters: using default 221 // encoding 222 charset = defaultCharset; 223 } 224 } else { 225 // no XML prolog: using default encoding 226 charset = defaultCharset; 227 } 228 if (charset != null) { 229 // encoding has been chosen: let's do it 230 prologWriter = null; 231 writer = new OutputStreamWriter(out, charset); 232 writer.write(xmlProlog.toString()); 233 if (len > size) { 234 writer.write(cbuf, off + size, len - size); 235 } 236 } 237 } 238 } 239 240 /** 241 * Flushes the underlying writer. 242 * 243 * @throws IOException if an error occurs flushing the underlying writer 244 */ 245 @Override 246 public void flush() throws IOException { 247 if (writer != null) { 248 writer.flush(); 249 } 250 } 251 252 /** 253 * Returns the default encoding. 254 * 255 * @return the default encoding 256 */ 257 public String getDefaultEncoding() { 258 return defaultCharset.name(); 259 } 260 261 /** 262 * Returns the detected encoding. 263 * 264 * @return the detected encoding 265 */ 266 public String getEncoding() { 267 return charset.name(); 268 } 269 270 /** 271 * Writes the characters to the underlying writer, detecting encoding. 272 * 273 * @param cbuf the buffer to write the characters from 274 * @param off The start offset 275 * @param len The number of characters to write 276 * @throws IOException if an error occurs detecting the encoding 277 */ 278 @Override 279 public void write(final char[] cbuf, final int off, final int len) throws IOException { 280 if (prologWriter != null) { 281 detectEncoding(cbuf, off, len); 282 } else { 283 writer.write(cbuf, off, len); 284 } 285 } 286}