1    
2    /*
3     *  ====================================================================
4     *  The Apache Software License, Version 1.1
5     *
6     *  Copyright (c) 2002 The Apache Software Foundation.  All rights
7     *  reserved.
8     *
9     *  Redistribution and use in source and binary forms, with or without
10    *  modification, are permitted provided that the following conditions
11    *  are met:
12    *
13    *  1. Redistributions of source code must retain the above copyright
14    *  notice, this list of conditions and the following disclaimer.
15    *
16    *  2. Redistributions in binary form must reproduce the above copyright
17    *  notice, this list of conditions and the following disclaimer in
18    *  the documentation and/or other materials provided with the
19    *  distribution.
20    *
21    *  3. The end-user documentation included with the redistribution,
22    *  if any, must include the following acknowledgment:
23    *  "This product includes software developed by the
24    *  Apache Software Foundation (http://www.apache.org/)."
25    *  Alternately, this acknowledgment may appear in the software itself,
26    *  if and wherever such third-party acknowledgments normally appear.
27    *
28    *  4. The names "Apache" and "Apache Software Foundation" and
29    *  "Apache POI" must not be used to endorse or promote products
30    *  derived from this software without prior written permission. For
31    *  written permission, please contact apache@apache.org.
32    *
33    *  5. Products derived from this software may not be called "Apache",
34    *  "Apache POI", nor may "Apache" appear in their name, without
35    *  prior written permission of the Apache Software Foundation.
36    *
37    *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38    *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39    *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40    *  DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41    *  ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42    *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43    *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44    *  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45    *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46    *  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47    *  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48    *  SUCH DAMAGE.
49    *  ====================================================================
50    *
51    *  This software consists of voluntary contributions made by many
52    *  individuals on behalf of the Apache Software Foundation.  For more
53    *  information on the Apache Software Foundation, please see
54    *  <http://www.apache.org/>.
55    */
56   package org.apache.poi.util;
57   
58   import java.io.UnsupportedEncodingException;
59   
60   import java.text.NumberFormat;
61   import java.text.FieldPosition;
62   
63   /**
64    *  Title: String Utility Description: Collection of string handling utilities
65    * 
66    * Now it is quite confusing: the method pairs, in which
67    * one of them write data and other read written data are:
68    * putUncompressedUnicodeHigh and getFromUnicode
69    * putUncompressedUnicode     and getFromUnicodeHigh
70    *
71    *@author     Andrew C. Oliver
72    *@author     Sergei Kozello (sergeikozello at mail.ru)
73    *@created    May 10, 2002
74    *@version    1.0
75    */
76   
77   public class StringUtil {
78       /**
79        *  Constructor for the StringUtil object
80        */
81       private StringUtil() { }
82   
83       
84       /**
85        *  given a byte array of 16-bit unicode characters, compress to 8-bit and
86        *  return a string
87        *
88        * { 0x16, 0x00 } -> 0x16
89        * 
90        *@param  string                              the byte array to be converted
91        *@param  offset                              the initial offset into the
92        *      byte array. it is assumed that string[ offset ] and string[ offset +
93        *      1 ] contain the first 16-bit unicode character
94        *@param  len
95        *@return                                     the converted string
96        *@exception  ArrayIndexOutOfBoundsException  if offset is out of bounds for
97        *      the byte array (i.e., is negative or is greater than or equal to
98        *      string.length)
99        *@exception  IllegalArgumentException        if len is too large (i.e.,
100       *      there is not enough data in string to create a String of that
101       *      length)
102       *@len                                        the length of the final string
103       */
104  
105      public static String getFromUnicodeHigh(final byte[] string,
106              final int offset, final int len)
107               throws ArrayIndexOutOfBoundsException, IllegalArgumentException {
108          if ((offset < 0) || (offset >= string.length)) {
109              throw new ArrayIndexOutOfBoundsException("Illegal offset");
110          }
111          if ((len < 0) || (((string.length - offset) / 2) < len)) {
112              throw new IllegalArgumentException("Illegal length");
113          }
114          
115          char[] chars = new char[ len ];
116          for ( int i = 0; i < chars.length; i++ ) {
117              chars[i] = (char)( string[ offset + ( 2*i ) ] & 0xFF | 
118                               ( string[ offset + ( 2*i+1 ) ] << 8 ) );
119          }
120  
121          return new String( chars );
122      }
123      
124      
125      /**
126       *  given a byte array of 16-bit unicode characters, compress to 8-bit and
127       *  return a string
128       * 
129       * { 0x16, 0x00 } -> 0x16
130       *
131       *@param  string  the byte array to be converted
132       *@return         the converted string
133       */
134  
135      public static String getFromUnicodeHigh( final byte[] string ) {
136          return getFromUnicodeHigh( string, 0, string.length / 2 );
137      }
138  
139  
140      /**
141       *  given a byte array of 16-bit unicode characters, compress to 8-bit and
142       *  return a string
143       * 
144       * { 0x00, 0x16 } -> 0x16
145       *
146       *@param  string                              the byte array to be converted
147       *@param  offset                              the initial offset into the
148       *      byte array. it is assumed that string[ offset ] and string[ offset +
149       *      1 ] contain the first 16-bit unicode character
150       *@param  len
151       *@return                                     the converted string
152       *@exception  ArrayIndexOutOfBoundsException  if offset is out of bounds for
153       *      the byte array (i.e., is negative or is greater than or equal to
154       *      string.length)
155       *@exception  IllegalArgumentException        if len is too large (i.e.,
156       *      there is not enough data in string to create a String of that
157       *      length)
158       *@len                                        the length of the final string
159       */
160  
161      public static String getFromUnicode(final byte[] string,
162              final int offset, final int len)
163               throws ArrayIndexOutOfBoundsException, IllegalArgumentException {
164          if ((offset < 0) || (offset >= string.length)) {
165              throw new ArrayIndexOutOfBoundsException("Illegal offset");
166          }
167          if ((len < 0) || (((string.length - offset) / 2) < len)) {
168              throw new IllegalArgumentException("Illegal length");
169          }
170  
171          
172          char[] chars = new char[ len ];
173          for ( int i = 0; i < chars.length; i++ ) {
174              chars[i] = (char)( ( string[ offset + ( 2*i ) ] << 8 ) +
175                                string[ offset + ( 2*i+1 ) ] );
176          }
177          
178          return new String( chars );
179      }
180  
181  
182      /**
183       *  given a byte array of 16-bit unicode characters, compress to 8-bit and
184       *  return a string
185       * 
186       * { 0x00, 0x16 } -> 0x16
187       *
188       *@param  string  the byte array to be converted
189       *@return         the converted string
190       */
191  
192      public static String getFromUnicode(final byte[] string) {
193          return getFromUnicode(string, 0, string.length / 2);
194      }
195  
196  
197      /**
198       *  write compressed unicode
199       *
200       *@param  input   the String containing the data to be written
201       *@param  output  the byte array to which the data is to be written
202       *@param  offset  an offset into the byte arrat at which the data is start
203       *      when written
204       */
205  
206      public static void putCompressedUnicode(final String input,
207              final byte[] output,
208              final int offset) {
209          int strlen = input.length();
210  
211          for (int k = 0; k < strlen; k++) {
212              output[offset + k] = (byte) input.charAt(k);
213          }
214      }
215  
216  
217      /**
218       *  Write uncompressed unicode
219       *
220       *@param  input   the String containing the unicode data to be written
221       *@param  output  the byte array to hold the uncompressed unicode
222       *@param  offset  the offset to start writing into the byte array
223       */
224  
225      public static void putUncompressedUnicode(final String input,
226              final byte[] output,
227              final int offset) {
228          int strlen = input.length();
229  
230          for (int k = 0; k < strlen; k++) {
231              char c = input.charAt(k);
232  
233              output[offset + (2 * k)] = (byte) c;
234              output[offset + (2 * k) + 1] = (byte) (c >> 8);
235          }
236      }
237  
238      /**
239       *  Write uncompressed unicode
240       *
241       *@param  input   the String containing the unicode data to be written
242       *@param  output  the byte array to hold the uncompressed unicode
243       *@param  offset  the offset to start writing into the byte array
244       */
245  
246      public static void putUncompressedUnicodeHigh(final String input,
247              final byte[] output,
248              final int offset) {
249          int strlen = input.length();
250  
251          for (int k = 0; k < strlen; k++) {
252              char c = input.charAt(k);
253  
254              output[offset + (2 * k)] = (byte) (c >> 8);
255              output[offset + (2 * k)] = (byte) c;
256          }
257      }
258      
259      
260      
261  
262      /**
263       *  Description of the Method
264       *
265       *@param  message  Description of the Parameter
266       *@param  params   Description of the Parameter
267       *@return          Description of the Return Value
268       */
269      public static String format(String message, Object[] params) {
270          int currentParamNumber = 0;
271          StringBuffer formattedMessage = new StringBuffer();
272  
273          for (int i = 0; i < message.length(); i++) {
274              if (message.charAt(i) == '%') {
275                  if (currentParamNumber >= params.length) {
276                      formattedMessage.append("?missing data?");
277                  } else if ((params[currentParamNumber] instanceof Number)
278                          && (i + 1 < message.length())) {
279                      i += matchOptionalFormatting(
280                              (Number) params[currentParamNumber++],
281                              message.substring(i + 1), formattedMessage);
282                  } else {
283                      formattedMessage.append(params[currentParamNumber++].toString());
284                  }
285              } else {
286                  if ((message.charAt(i) == '\\') && (i + 1 < message.length())
287                          && (message.charAt(i + 1) == '%')) {
288                      formattedMessage.append('%');
289                      i++;
290                  } else {
291                      formattedMessage.append(message.charAt(i));
292                  }
293              }
294          }
295          return formattedMessage.toString();
296      }
297  
298  
299      /**
300       *  Description of the Method
301       *
302       *@param  number      Description of the Parameter
303       *@param  formatting  Description of the Parameter
304       *@param  outputTo    Description of the Parameter
305       *@return             Description of the Return Value
306       */
307      private static int matchOptionalFormatting(Number number,
308              String formatting,
309              StringBuffer outputTo) {
310          NumberFormat numberFormat = NumberFormat.getInstance();
311  
312          if ((0 < formatting.length())
313                  && Character.isDigit(formatting.charAt(0))) {
314              numberFormat.setMinimumIntegerDigits(Integer.parseInt(formatting.charAt(0) + ""));
315              if ((2 < formatting.length()) && (formatting.charAt(1) == '.')
316                      && Character.isDigit(formatting.charAt(2))) {
317                  numberFormat.setMaximumFractionDigits(Integer.parseInt(formatting.charAt(2) + ""));
318                  numberFormat.format(number, outputTo, new FieldPosition(0));
319                  return 3;
320              }
321              numberFormat.format(number, outputTo, new FieldPosition(0));
322              return 1;
323          } else if ((0 < formatting.length()) && (formatting.charAt(0) == '.')) {
324              if ((1 < formatting.length())
325                      && Character.isDigit(formatting.charAt(1))) {
326                  numberFormat.setMaximumFractionDigits(Integer.parseInt(formatting.charAt(1) + ""));
327                  numberFormat.format(number, outputTo, new FieldPosition(0));
328                  return 2;
329              }
330          }
331          numberFormat.format(number, outputTo, new FieldPosition(0));
332          return 1;
333      }
334  }
335