001 package org.apache.fulcrum.mimetype.util; 002 003 004 /* 005 * Licensed to the Apache Software Foundation (ASF) under one 006 * or more contributor license agreements. See the NOTICE file 007 * distributed with this work for additional information 008 * regarding copyright ownership. The ASF licenses this file 009 * to you under the Apache License, Version 2.0 (the 010 * "License"); you may not use this file except in compliance 011 * with the License. You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, 016 * software distributed under the License is distributed on an 017 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 018 * KIND, either express or implied. See the License for the 019 * specific language governing permissions and limitations 020 * under the License. 021 */ 022 023 024 import java.util.Locale; 025 import java.util.Map; 026 import java.util.HashMap; 027 import java.util.Hashtable; 028 import java.util.Properties; 029 import java.io.File; 030 import java.io.InputStream; 031 import java.io.FileInputStream; 032 import java.io.IOException; 033 034 /** 035 * This class maintains a set of mappers defining mappings 036 * between locales and the corresponding charsets. The mappings 037 * are defined as properties between locale and charset names. 038 * The definitions can be listed in property files located in user's 039 * home directory, Java home directory or the current class jar. 040 * In addition, this class maintains static default mappings 041 * and constructors support application specific mappings. 042 * 043 * @author <a href="mailto:ilkka.priha@simsoft.fi">Ilkka Priha</a> 044 * @version $Id: CharSetMap.java 826489 2009-10-18 18:54:59Z tv $ 045 */ 046 public class CharSetMap 047 { 048 /** 049 * The default charset when nothing else is applicable. 050 */ 051 public static final String DEFAULT_CHARSET = "ISO-8859-1"; 052 053 /** 054 * The name for charset mapper resources. 055 */ 056 public static final String CHARSET_RESOURCE = "charset.properties"; 057 058 /** 059 * Priorities of available mappers. 060 */ 061 private static final int MAP_CACHE = 0; 062 private static final int MAP_PROG = 1; 063 private static final int MAP_HOME = 2; 064 private static final int MAP_SYS = 3; 065 private static final int MAP_JAR = 4; 066 private static final int MAP_COM = 5; 067 068 /** 069 * A common charset mapper for languages. 070 */ 071 private static HashMap commonMapper = new HashMap(); 072 static 073 { 074 commonMapper.put("ar","ISO-8859-6"); 075 commonMapper.put("be","ISO-8859-5"); 076 commonMapper.put("bg","ISO-8859-5"); 077 commonMapper.put("ca","ISO-8859-1"); 078 commonMapper.put("cs","ISO-8859-2"); 079 commonMapper.put("da","ISO-8859-1"); 080 commonMapper.put("de","ISO-8859-1"); 081 commonMapper.put("el","ISO-8859-7"); 082 commonMapper.put("en","ISO-8859-1"); 083 commonMapper.put("es","ISO-8859-1"); 084 commonMapper.put("et","ISO-8859-1"); 085 commonMapper.put("fi","ISO-8859-1"); 086 commonMapper.put("fr","ISO-8859-1"); 087 commonMapper.put("hr","ISO-8859-2"); 088 commonMapper.put("hu","ISO-8859-2"); 089 commonMapper.put("is","ISO-8859-1"); 090 commonMapper.put("it","ISO-8859-1"); 091 commonMapper.put("iw","ISO-8859-8"); 092 commonMapper.put("ja","Shift_JIS"); 093 commonMapper.put("ko","EUC-KR"); 094 commonMapper.put("lt","ISO-8859-2"); 095 commonMapper.put("lv","ISO-8859-2"); 096 commonMapper.put("mk","ISO-8859-5"); 097 commonMapper.put("nl","ISO-8859-1"); 098 commonMapper.put("no","ISO-8859-1"); 099 commonMapper.put("pl","ISO-8859-2"); 100 commonMapper.put("pt","ISO-8859-1"); 101 commonMapper.put("ro","ISO-8859-2"); 102 commonMapper.put("ru","ISO-8859-5"); 103 commonMapper.put("sh","ISO-8859-5"); 104 commonMapper.put("sk","ISO-8859-2"); 105 commonMapper.put("sl","ISO-8859-2"); 106 commonMapper.put("sq","ISO-8859-2"); 107 commonMapper.put("sr","ISO-8859-5"); 108 commonMapper.put("sv","ISO-8859-1"); 109 commonMapper.put("tr","ISO-8859-9"); 110 commonMapper.put("uk","ISO-8859-5"); 111 commonMapper.put("zh","GB2312"); 112 commonMapper.put("zh_TW","Big5"); 113 } 114 115 /** 116 * An array of available charset mappers. 117 */ 118 private Map mappers[] = new Map[6]; 119 120 /** 121 * Loads mappings from a stream. 122 * 123 * @param input an input stream. 124 * @return the mappings. 125 * @throws IOException for an incorrect stream. 126 */ 127 protected static Map loadStream(InputStream input) 128 throws IOException 129 { 130 Properties props = new Properties(); 131 props.load(input); 132 return new HashMap(props); 133 } 134 135 /** 136 * Loads mappings from a file. 137 * 138 * @param file a file. 139 * @return the mappings. 140 * @throws IOException for an incorrect file. 141 */ 142 protected static Map loadFile(File file) 143 throws IOException 144 { 145 return loadStream(new FileInputStream(file)); 146 } 147 148 /** 149 * Loads mappings from a file path. 150 * 151 * @param path a file path. 152 * @return the mappings. 153 * @throws IOException for an incorrect file. 154 */ 155 protected static Map loadPath(String path) 156 throws IOException 157 { 158 return loadFile(new File(path)); 159 } 160 161 /** 162 * Loads mappings from a resource. 163 * 164 * @param name a resource name. 165 * @return the mappings. 166 */ 167 protected static Map loadResource(String name) 168 { 169 InputStream input = CharSetMap.class.getResourceAsStream(name); 170 if (input != null) 171 { 172 try 173 { 174 return loadStream(input); 175 } 176 catch (IOException x) 177 { 178 return null; 179 } 180 } 181 else 182 { 183 return null; 184 } 185 } 186 187 /** 188 * Constructs a new charset map with default mappers. 189 */ 190 public CharSetMap() 191 { 192 String path; 193 try 194 { 195 // Check whether the user directory contains mappings. 196 path = System.getProperty("user.home"); 197 if (path != null) 198 { 199 path = path + File.separator + CHARSET_RESOURCE; 200 mappers[MAP_HOME] = loadPath(path); 201 } 202 } 203 catch (IOException x) 204 { 205 // ignore 206 } 207 208 try 209 { 210 // Check whether the system directory contains mappings. 211 path = System.getProperty("java.home") + 212 File.separator + "lib" + File.separator + CHARSET_RESOURCE; 213 mappers[MAP_SYS] = loadPath(path); 214 } 215 catch (IOException x) 216 { 217 // ignore 218 } 219 220 // Check whether the current class jar contains mappings. 221 mappers[MAP_JAR] = loadResource("/META-INF/" + CHARSET_RESOURCE); 222 223 // Set the common mapper to have the lowest priority. 224 mappers[MAP_COM] = commonMapper; 225 226 // Set the cache mapper to have the highest priority. 227 mappers[MAP_CACHE] = new Hashtable(); 228 } 229 230 /** 231 * Contructs a charset map from properties. 232 * 233 * @param props charset mapping propeties. 234 */ 235 public CharSetMap(Properties props) 236 { 237 this(); 238 mappers[MAP_PROG] = new HashMap(props); 239 } 240 241 /** 242 * Contructs a charset map read from a stream. 243 * 244 * @param input an input stream. 245 * @throws IOException for an incorrect stream. 246 */ 247 public CharSetMap(InputStream input) 248 throws IOException 249 { 250 this(); 251 mappers[MAP_PROG] = loadStream(input); 252 } 253 254 /** 255 * Contructs a charset map read from a property file. 256 * 257 * @param file a property file. 258 * @throws IOException for an incorrect property file. 259 */ 260 public CharSetMap(File file) 261 throws IOException 262 { 263 this(); 264 mappers[MAP_PROG] = loadFile(file); 265 } 266 267 /** 268 * Contructs a charset map read from a property file path. 269 * 270 * @param path a property file path. 271 * @throws IOException for an incorrect property file. 272 */ 273 public CharSetMap(String path) 274 throws IOException 275 { 276 this(); 277 mappers[MAP_PROG] = loadPath(path); 278 } 279 280 /** 281 * Sets a locale-charset mapping. 282 * 283 * @param key the key for the charset. 284 * @param charset the corresponding charset. 285 */ 286 public synchronized void setCharSet(String key, 287 String charset) 288 { 289 HashMap mapper = (HashMap) mappers[MAP_PROG]; 290 mapper = mapper != null ? 291 (HashMap) mapper.clone() : new HashMap(); 292 mapper.put(key,charset); 293 mappers[MAP_PROG] = mapper; 294 mappers[MAP_CACHE].clear(); 295 } 296 297 /** 298 * Gets the charset for a locale. First a locale specific charset 299 * is searched for, then a country specific one and lastly a language 300 * specific one. If none is found, the default charset is returned. 301 * 302 * @param locale the locale. 303 * @return the charset. 304 */ 305 public String getCharSet(Locale locale) 306 { 307 // Check the cache first. 308 String key = locale.toString(); 309 if (key.length() == 0) 310 { 311 key = "__" + locale.getVariant(); 312 if (key.length() == 2) 313 { 314 return DEFAULT_CHARSET; 315 } 316 } 317 String charset = searchCharSet(key); 318 if (charset.length() == 0) 319 { 320 // Not found, perform a full search and update the cache. 321 String[] items = new String[3]; 322 items[2] = locale.getVariant(); 323 items[1] = locale.getCountry(); 324 items[0] = locale.getLanguage(); 325 charset = searchCharSet(items); 326 if (charset.length() == 0) 327 { 328 charset = DEFAULT_CHARSET; 329 } 330 mappers[MAP_CACHE].put(key,charset); 331 } 332 return charset; 333 } 334 335 /** 336 * Gets the charset for a locale with a variant. The search 337 * is performed in the following order: 338 * "lang"_"country"_"variant"="charset", 339 * _"counry"_"variant"="charset", 340 * "lang"__"variant"="charset", 341 * __"variant"="charset", 342 * "lang"_"country"="charset", 343 * _"country"="charset", 344 * "lang"="charset". 345 * If nothing of the above is found, the default charset is returned. 346 * 347 * @param locale the locale. 348 * @param variant a variant field. 349 * @return the charset. 350 */ 351 public String getCharSet(Locale locale, 352 String variant) 353 { 354 // Check the cache first. 355 if ((variant != null) && 356 (variant.length() > 0)) 357 { 358 String key = locale.toString(); 359 if (key.length() == 0) 360 { 361 key = "__" + locale.getVariant(); 362 if (key.length() > 2) 363 { 364 key += '_' + variant; 365 } 366 else 367 { 368 key += variant; 369 } 370 } 371 else if (locale.getCountry().length() == 0) 372 { 373 key += "__" + variant; 374 } 375 else 376 { 377 key += '_' + variant; 378 } 379 String charset = searchCharSet(key); 380 if (charset.length() == 0) 381 { 382 // Not found, perform a full search and update the cache. 383 String[] items = new String[4]; 384 items[3] = variant; 385 items[2] = locale.getVariant(); 386 items[1] = locale.getCountry(); 387 items[0] = locale.getLanguage(); 388 charset = searchCharSet(items); 389 if (charset.length() == 0) 390 { 391 charset = DEFAULT_CHARSET; 392 } 393 mappers[MAP_CACHE].put(key,charset); 394 } 395 return charset; 396 } 397 else 398 { 399 return getCharSet(locale); 400 } 401 } 402 403 /** 404 * Gets the charset for a specified key. 405 * 406 * @param key the key for the charset. 407 * @return the found charset or the default one. 408 */ 409 public String getCharSet(String key) 410 { 411 String charset = searchCharSet(key); 412 return charset.length() > 0 ? charset : DEFAULT_CHARSET; 413 } 414 415 /** 416 * Gets the charset for a specified key. 417 * 418 * @param key the key for the charset. 419 * @param def the default charset if none is found. 420 * @return the found charset or the given default. 421 */ 422 public String getCharSet(String key, 423 String def) 424 { 425 String charset = searchCharSet(key); 426 return charset.length() > 0 ? charset : def; 427 } 428 429 /** 430 * Searches for a charset for a specified locale. 431 * 432 * @param items an array of locale items. 433 * @return the found charset or an empty string. 434 */ 435 private String searchCharSet(String[] items) 436 { 437 String charset; 438 StringBuffer sb = new StringBuffer(); 439 for (int i = items.length; i > 0; i--) 440 { 441 charset = searchCharSet(items,sb,i); 442 if (charset.length() > 0) 443 { 444 return charset; 445 } 446 sb.setLength(0); 447 } 448 return ""; 449 } 450 451 /** 452 * Searches recursively for a charset for a specified locale. 453 * 454 * @param items an array of locale items. 455 * @param base a buffer of base items. 456 * @param count the number of items to go through. 457 * @return the found charset or an empty string. 458 */ 459 private String searchCharSet(String[] items, 460 StringBuffer base, 461 int count) 462 { 463 if ((--count >= 0) && 464 (items[count] != null) && 465 (items[count].length() > 0)) 466 { 467 String charset; 468 base.insert(0,items[count]); 469 int length = base.length(); 470 for (int i = count; i > 0; i--) 471 { 472 if ((i == count) || 473 (i <= 1)) 474 { 475 base.insert(0,'_'); 476 length++; 477 } 478 charset = searchCharSet(items,base,i); 479 if (charset.length() > 0) 480 { 481 return charset; 482 } 483 base.delete(0,base.length() - length); 484 } 485 return searchCharSet(base.toString()); 486 } 487 else 488 { 489 return ""; 490 } 491 } 492 493 /** 494 * Searches for a charset for a specified key. 495 * 496 * @param key the key for the charset. 497 * @return the found charset or an empty string. 498 */ 499 private String searchCharSet(String key) 500 { 501 if ((key != null) && 502 (key.length() > 0)) 503 { 504 // Go through mappers. 505 Map mapper; 506 String charset; 507 for (int i = 0; i < mappers.length; i++) 508 { 509 mapper = mappers[i]; 510 if (mapper != null) 511 { 512 charset = (String) mapper.get(key); 513 if (charset != null) 514 { 515 // Update the cache. 516 if (i > MAP_CACHE) 517 { 518 mappers[MAP_CACHE].put(key,charset); 519 } 520 return charset; 521 } 522 } 523 } 524 525 // Not found, add an empty string to the cache. 526 mappers[MAP_CACHE].put(key,""); 527 } 528 return ""; 529 } 530 531 /** 532 * Sets a common locale-charset mapping. 533 * 534 * @param key the key for the charset. 535 * @param charset the corresponding charset. 536 */ 537 protected synchronized void setCommonCharSet(String key, 538 String charset) 539 { 540 HashMap mapper = (HashMap) ((HashMap) mappers[MAP_COM]).clone(); 541 mapper.put(key,charset); 542 mappers[MAP_COM] = mapper; 543 mappers[MAP_CACHE].clear(); 544 } 545 }