001    package org.apache.fulcrum.mimetype.util;
002    
003    
004    /*
005     * Licensed to the Apache Software Foundation (ASF) under one
006     * or more contributor license agreements.  See the NOTICE file
007     * distributed with this work for additional information
008     * regarding copyright ownership.  The ASF licenses this file
009     * to you under the Apache License, Version 2.0 (the
010     * "License"); you may not use this file except in compliance
011     * with the License.  You may obtain a copy of the License at
012     *
013     *   http://www.apache.org/licenses/LICENSE-2.0
014     *
015     * Unless required by applicable law or agreed to in writing,
016     * software distributed under the License is distributed on an
017     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
018     * KIND, either express or implied.  See the License for the
019     * specific language governing permissions and limitations
020     * under the License.
021     */
022    
023    
024    import java.util.Locale;
025    import java.util.Map;
026    import java.util.HashMap;
027    import java.util.Hashtable;
028    import java.util.Properties;
029    import java.io.File;
030    import java.io.InputStream;
031    import java.io.FileInputStream;
032    import java.io.IOException;
033    
034    /**
035     * This class maintains a set of mappers defining mappings
036     * between locales and the corresponding charsets. The mappings
037     * are defined as properties between locale and charset names.
038     * The definitions can be listed in property files located in user's
039     * home directory, Java home directory or the current class jar.
040     * In addition, this class maintains static default mappings
041     * and constructors support application specific mappings.
042     *
043     * @author <a href="mailto:ilkka.priha@simsoft.fi">Ilkka Priha</a>
044     * @version $Id: CharSetMap.java 826489 2009-10-18 18:54:59Z tv $
045     */
046    public class CharSetMap
047    {
048        /**
049         * The default charset when nothing else is applicable.
050         */
051        public static final String DEFAULT_CHARSET = "ISO-8859-1";
052    
053        /**
054         * The name for charset mapper resources.
055         */
056        public static final String CHARSET_RESOURCE = "charset.properties";
057    
058        /**
059         * Priorities of available mappers.
060         */
061        private static final int MAP_CACHE = 0;
062        private static final int MAP_PROG = 1;
063        private static final int MAP_HOME = 2;
064        private static final int MAP_SYS = 3;
065        private static final int MAP_JAR = 4;
066        private static final int MAP_COM = 5;
067    
068        /**
069         * A common charset mapper for languages.
070         */
071        private static HashMap commonMapper = new HashMap();
072        static
073        {
074            commonMapper.put("ar","ISO-8859-6");
075            commonMapper.put("be","ISO-8859-5");
076            commonMapper.put("bg","ISO-8859-5");
077            commonMapper.put("ca","ISO-8859-1");
078            commonMapper.put("cs","ISO-8859-2");
079            commonMapper.put("da","ISO-8859-1");
080            commonMapper.put("de","ISO-8859-1");
081            commonMapper.put("el","ISO-8859-7");
082            commonMapper.put("en","ISO-8859-1");
083            commonMapper.put("es","ISO-8859-1");
084            commonMapper.put("et","ISO-8859-1");
085            commonMapper.put("fi","ISO-8859-1");
086            commonMapper.put("fr","ISO-8859-1");
087            commonMapper.put("hr","ISO-8859-2");
088            commonMapper.put("hu","ISO-8859-2");
089            commonMapper.put("is","ISO-8859-1");
090            commonMapper.put("it","ISO-8859-1");
091            commonMapper.put("iw","ISO-8859-8");
092            commonMapper.put("ja","Shift_JIS");
093            commonMapper.put("ko","EUC-KR");
094            commonMapper.put("lt","ISO-8859-2");
095            commonMapper.put("lv","ISO-8859-2");
096            commonMapper.put("mk","ISO-8859-5");
097            commonMapper.put("nl","ISO-8859-1");
098            commonMapper.put("no","ISO-8859-1");
099            commonMapper.put("pl","ISO-8859-2");
100            commonMapper.put("pt","ISO-8859-1");
101            commonMapper.put("ro","ISO-8859-2");
102            commonMapper.put("ru","ISO-8859-5");
103            commonMapper.put("sh","ISO-8859-5");
104            commonMapper.put("sk","ISO-8859-2");
105            commonMapper.put("sl","ISO-8859-2");
106            commonMapper.put("sq","ISO-8859-2");
107            commonMapper.put("sr","ISO-8859-5");
108            commonMapper.put("sv","ISO-8859-1");
109            commonMapper.put("tr","ISO-8859-9");
110            commonMapper.put("uk","ISO-8859-5");
111            commonMapper.put("zh","GB2312");
112            commonMapper.put("zh_TW","Big5");
113        }
114    
115        /**
116         * An array of available charset mappers.
117         */
118        private Map mappers[] = new Map[6];
119    
120        /**
121         * Loads mappings from a stream.
122         *
123         * @param input an input stream.
124         * @return the mappings.
125         * @throws IOException for an incorrect stream.
126         */
127        protected static Map loadStream(InputStream input)
128            throws IOException
129        {
130            Properties props = new Properties();
131            props.load(input);
132            return new HashMap(props);
133        }
134    
135        /**
136         * Loads mappings from a file.
137         *
138         * @param file a file.
139         * @return the mappings.
140         * @throws IOException for an incorrect file.
141         */
142        protected static Map loadFile(File file)
143            throws IOException
144        {
145            return loadStream(new FileInputStream(file));
146        }
147    
148        /**
149         * Loads mappings from a file path.
150         *
151         * @param path a file path.
152         * @return the mappings.
153         * @throws IOException for an incorrect file.
154         */
155        protected static Map loadPath(String path)
156            throws IOException
157        {
158            return loadFile(new File(path));
159        }
160    
161        /**
162         * Loads mappings from a resource.
163         *
164         * @param name a resource name.
165         * @return the mappings.
166         */
167        protected static Map loadResource(String name)
168        {
169            InputStream input = CharSetMap.class.getResourceAsStream(name);
170            if (input != null)
171            {
172                try
173                {
174                    return loadStream(input);
175                }
176                catch (IOException x)
177                {
178                    return null;
179                }
180            }
181            else
182            {
183                return null;
184            }
185        }
186    
187        /**
188         * Constructs a new charset map with default mappers.
189         */
190        public CharSetMap()
191        {
192            String path;
193            try
194            {
195                // Check whether the user directory contains mappings.
196                path = System.getProperty("user.home");
197                if (path != null)
198                {
199                    path = path + File.separator + CHARSET_RESOURCE;
200                    mappers[MAP_HOME] = loadPath(path);
201                }
202            }
203            catch (IOException x)
204            {
205                // ignore
206            }
207    
208            try
209            {
210                // Check whether the system directory contains mappings.
211                path = System.getProperty("java.home") +
212                    File.separator + "lib" + File.separator + CHARSET_RESOURCE;
213                mappers[MAP_SYS] = loadPath(path);
214            }
215            catch (IOException x)
216            {
217                // ignore
218            }
219    
220            // Check whether the current class jar contains mappings.
221            mappers[MAP_JAR] = loadResource("/META-INF/" + CHARSET_RESOURCE);
222    
223            // Set the common mapper to have the lowest priority.
224            mappers[MAP_COM] = commonMapper;
225    
226            // Set the cache mapper to have the highest priority.
227            mappers[MAP_CACHE] = new Hashtable();
228        }
229    
230        /**
231         * Contructs a charset map from properties.
232         *
233         * @param props charset mapping propeties.
234         */
235        public CharSetMap(Properties props)
236        {
237            this();
238            mappers[MAP_PROG] = new HashMap(props);
239        }
240    
241        /**
242         * Contructs a charset map read from a stream.
243         *
244         * @param input an input stream.
245         * @throws IOException for an incorrect stream.
246         */
247        public CharSetMap(InputStream input)
248            throws IOException
249        {
250            this();
251            mappers[MAP_PROG] = loadStream(input);
252        }
253    
254        /**
255         * Contructs a charset map read from a property file.
256         *
257         * @param file a property file.
258         * @throws IOException for an incorrect property file.
259         */
260        public CharSetMap(File file)
261            throws IOException
262        {
263            this();
264            mappers[MAP_PROG] = loadFile(file);
265        }
266    
267        /**
268         * Contructs a charset map read from a property file path.
269         *
270         * @param path a property file path.
271         * @throws IOException for an incorrect property file.
272         */
273        public CharSetMap(String path)
274            throws IOException
275        {
276            this();
277            mappers[MAP_PROG] = loadPath(path);
278        }
279    
280        /**
281         * Sets a locale-charset mapping.
282         *
283         * @param key the key for the charset.
284         * @param charset the corresponding charset.
285         */
286        public synchronized void setCharSet(String key,
287                                            String charset)
288        {
289            HashMap mapper = (HashMap) mappers[MAP_PROG];
290            mapper = mapper != null ?
291                (HashMap) mapper.clone() : new HashMap();
292            mapper.put(key,charset);
293            mappers[MAP_PROG] = mapper;
294            mappers[MAP_CACHE].clear();
295        }
296    
297        /**
298         * Gets the charset for a locale. First a locale specific charset
299         * is searched for, then a country specific one and lastly a language
300         * specific one. If none is found, the default charset is returned.
301         *
302         * @param locale the locale.
303         * @return the charset.
304         */
305        public String getCharSet(Locale locale)
306        {
307            // Check the cache first.
308            String key = locale.toString();
309            if (key.length() == 0)
310            {
311                key = "__" + locale.getVariant();
312                if (key.length() == 2)
313                {
314                    return DEFAULT_CHARSET;
315                }
316            }
317            String charset = searchCharSet(key);
318            if (charset.length() == 0)
319            {
320                // Not found, perform a full search and update the cache.
321                String[] items = new String[3];
322                items[2] = locale.getVariant();
323                items[1] = locale.getCountry();
324                items[0] = locale.getLanguage();
325                charset = searchCharSet(items);
326                if (charset.length() == 0)
327                {
328                    charset = DEFAULT_CHARSET;
329                }
330                mappers[MAP_CACHE].put(key,charset);
331            }
332            return charset;
333        }
334    
335        /**
336         * Gets the charset for a locale with a variant. The search
337         * is performed in the following order:
338         * "lang"_"country"_"variant"="charset",
339         * _"counry"_"variant"="charset",
340         * "lang"__"variant"="charset",
341         * __"variant"="charset",
342         * "lang"_"country"="charset",
343         * _"country"="charset",
344         * "lang"="charset".
345         * If nothing of the above is found, the default charset is returned.
346         *
347         * @param locale the locale.
348         * @param variant a variant field.
349         * @return the charset.
350         */
351        public String getCharSet(Locale locale,
352                                 String variant)
353        {
354            // Check the cache first.
355            if ((variant != null) &&
356                (variant.length() > 0))
357            {
358                String key = locale.toString();
359                if (key.length() == 0)
360                {
361                    key = "__" + locale.getVariant();
362                    if (key.length() > 2)
363                    {
364                        key += '_' + variant;
365                    }
366                    else
367                    {
368                        key += variant;
369                    }
370                }
371                else if (locale.getCountry().length() == 0)
372                {
373                    key += "__" + variant;
374                }
375                else
376                {
377                    key += '_' + variant;
378                }
379                String charset = searchCharSet(key);
380                if (charset.length() == 0)
381                {
382                    // Not found, perform a full search and update the cache.
383                    String[] items = new String[4];
384                    items[3] = variant;
385                    items[2] = locale.getVariant();
386                    items[1] = locale.getCountry();
387                    items[0] = locale.getLanguage();
388                    charset = searchCharSet(items);
389                    if (charset.length() == 0)
390                    {
391                        charset = DEFAULT_CHARSET;
392                    }
393                    mappers[MAP_CACHE].put(key,charset);
394                }
395                return charset;
396            }
397            else
398            {
399                return getCharSet(locale);
400            }
401        }
402    
403        /**
404         * Gets the charset for a specified key.
405         *
406         * @param key the key for the charset.
407         * @return the found charset or the default one.
408         */
409        public String getCharSet(String key)
410        {
411            String charset = searchCharSet(key);
412            return charset.length() > 0 ? charset : DEFAULT_CHARSET;
413        }
414    
415        /**
416         * Gets the charset for a specified key.
417         *
418         * @param key the key for the charset.
419         * @param def the default charset if none is found.
420         * @return the found charset or the given default.
421         */
422        public String getCharSet(String key,
423                                 String def)
424        {
425            String charset = searchCharSet(key);
426            return charset.length() > 0 ? charset : def;
427        }
428    
429        /**
430         * Searches for a charset for a specified locale.
431         *
432         * @param items an array of locale items.
433         * @return the found charset or an empty string.
434         */
435        private String searchCharSet(String[] items)
436        {
437            String charset;
438            StringBuffer sb = new StringBuffer();
439            for (int i = items.length; i > 0; i--)
440            {
441                charset = searchCharSet(items,sb,i);
442                if (charset.length() > 0)
443                {
444                    return charset;
445                }
446                sb.setLength(0);
447            }
448            return "";
449        }
450    
451      /**
452       * Searches recursively for a charset for a specified locale.
453       *
454       * @param items an array of locale items.
455       * @param base a buffer of base items.
456       * @param count the number of items to go through.
457       * @return the found charset or an empty string.
458       */
459        private String searchCharSet(String[] items,
460                                     StringBuffer base,
461                                     int count)
462        {
463            if ((--count >= 0) &&
464                (items[count] != null) &&
465                (items[count].length() > 0))
466            {
467                String charset;
468                base.insert(0,items[count]);
469                int length = base.length();
470                for (int i = count; i > 0; i--)
471                {
472                    if ((i == count) ||
473                        (i <= 1))
474                    {
475                        base.insert(0,'_');
476                        length++;
477                    }
478                    charset = searchCharSet(items,base,i);
479                    if (charset.length() > 0)
480                    {
481                        return charset;
482                    }
483                    base.delete(0,base.length() - length);
484                }
485                return searchCharSet(base.toString());
486            }
487            else
488            {
489                return "";
490            }
491        }
492    
493        /**
494         * Searches for a charset for a specified key.
495         *
496         * @param key the key for the charset.
497         * @return the found charset or an empty string.
498         */
499        private String searchCharSet(String key)
500        {
501            if ((key != null) &&
502                (key.length() > 0))
503            {
504                // Go through mappers.
505                Map mapper;
506                String charset;
507                for (int i = 0; i < mappers.length; i++)
508                {
509                    mapper = mappers[i];
510                    if (mapper != null)
511                    {
512                        charset = (String) mapper.get(key);
513                        if (charset != null)
514                        {
515                            // Update the cache.
516                            if (i > MAP_CACHE)
517                            {
518                                mappers[MAP_CACHE].put(key,charset);
519                            }
520                            return charset;
521                        }
522                    }
523                }
524    
525                // Not found, add an empty string to the cache.
526                mappers[MAP_CACHE].put(key,"");
527            }
528            return "";
529        }
530    
531        /**
532         * Sets a common locale-charset mapping.
533         *
534         * @param key the key for the charset.
535         * @param charset the corresponding charset.
536         */
537        protected synchronized void setCommonCharSet(String key,
538                                                     String charset)
539        {
540            HashMap mapper = (HashMap) ((HashMap) mappers[MAP_COM]).clone();
541            mapper.put(key,charset);
542            mappers[MAP_COM] = mapper;
543            mappers[MAP_CACHE].clear();
544        }
545    }