1 /* 2 * Copyright 2007-2009 Medsea Business Solutions S.L. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package eu.medsea.mimeutil; 17 18 import java.io.File; 19 import java.io.InputStream; 20 import java.io.UnsupportedEncodingException; 21 import java.net.URL; 22 import java.nio.ByteOrder; 23 import java.util.ArrayList; 24 import java.util.Collection; 25 import java.util.Collections; 26 import java.util.HashMap; 27 import java.util.Iterator; 28 import java.util.LinkedHashMap; 29 import java.util.List; 30 import java.util.Map; 31 import java.util.Set; 32 import java.util.TreeMap; 33 import java.util.TreeSet; 34 import java.util.regex.Pattern; 35 import java.util.zip.ZipException; 36 37 import org.slf4j.Logger; 38 import org.slf4j.LoggerFactory; 39 40 import eu.medsea.mimeutil.detector.MimeDetector; 41 import eu.medsea.util.EncodingGuesser; 42 import eu.medsea.util.StringUtil; 43 import eu.medsea.util.ZipJarUtil; 44 45 /** 46 * <p> 47 * The <code>MimeUtil2</code> is a utility class that allows applications to detect, work with and manipulate MIME types. 48 * </p> 49 * <p> 50 * A MIME or "Multipurpose Internet Mail Extension" type is an Internet standard that is important outside of just e-mail use. 51 * MIME is used extensively in other communications protocols such as HTTP for web communications. 52 * IANA "Internet Assigned Numbers Authority" is responsible for the standardisation and publication of MIME types. Basically any 53 * resource on any computer that can be located via a URL can be assigned a MIME type. So for instance, JPEG images have a MIME type 54 * of image/jpg. Some resources can have multiple MIME types associated with them such as files with an XML extension have the MIME types 55 * text/xml and application/xml and even specialised versions of xml such as image/svg+xml for SVG image files. 56 * </p> 57 * <p> 58 * To do this <code>MimeUtil2</code> uses registered <code>MimeDetector</code>(s) that are delegated too in sequence to actually 59 * perform the detection. There are several <code>MimeDetector</code> implementations that come with the utility and 60 * you can register and unregister them to perform detection based on file extensions, file globing and magic number detection.<br/> 61 * Their is also a fourth MimeDetector that is registered by default that detects text files and encodings. Unlike the other 62 * MimeDetector(s) or any MimeDetector(s) you may choose to implement, the TextMimeDetector cannot be registered or 63 * unregistered by your code. It is advisable that you read the java doc for the TextMimeDetector as it can be modified in 64 * several ways to make it perform better and or detect more specific types.<br/> 65 * 66 * Please refer to the java doc for each of these <code>MimeDetector</code>(s) for a description of how they 67 * actually perform their particular detection process. 68 * </p> 69 * <p> 70 * It is important to note that MIME matching is not an exact science, meaning 71 * that a positive match does not guarantee that the returned MIME type is actually correct. 72 * It is a best guess method of matching and the matched MIME types should be used with this in 73 * mind. 74 * </p> 75 * <p> 76 * New <code>MimeDetector</code>(s) can easily be created and registered with <code>MimeUtil2</code> to extend it's 77 * functionality beyond these initial detection strategies by extending the <code>AbstractMimeDetector</code> class. 78 * To see how to implement your own <code>MimeDetector</code> take a look 79 * at the java doc and source code for the {@link ExtensionMimeDetector}, {@link MagicMimeMimeDetector} and 80 * {@link OpendesktopMimeDetector} classes. To register and unregister MimeDetector(s) use the 81 * [un]registerMimeDetector(...) methods of this class. 82 * </p> 83 * <p> 84 * The order that the <code>MimeDetector</code>(s) are executed is defined by the order each <code>MimeDetector</code> is registered. 85 * </p> 86 * <p> 87 * The resulting <code>Collection</code> of mime types returned in response to a getMimeTypes(...) call is a normalised list of the 88 * accumulation of MIME types returned by each of the registered <code>MimeDetector</code>(s) that implement the specified getMimeTypesXXX(...) 89 * methods. 90 * </p> 91 * <p> 92 * All methods in this class that return a Collection object containing MimeType(s) actually return a {@link MimeTypeHashSet} 93 * that implements both the {@link Set} and {@link Collection} interfaces. 94 * </p> 95 * 96 * @author Steven McArdle. 97 * @since 2.1 98 * 99 */ 100 public class MimeUtil2 { 101 private static Logger log = LoggerFactory.getLogger(MimeUtil2.class); 102 103 /** 104 * Mime type used to identify a directory 105 */ 106 public static final MimeType DIRECTORY_MIME_TYPE = new MimeType("application/directory"); 107 /** 108 * Mime type used to identify an unknown MIME type 109 */ 110 public static final MimeType UNKNOWN_MIME_TYPE = new MimeType("application/octet-stream"); 111 112 private static final Pattern mimeSplitter = Pattern.compile("[/;]++"); 113 114 // All mime types known to the utility. This is synchronised for multi-threaded use 115 // and ALL instances of MimeUtil2 share this list. 116 private static Map mimeTypes = Collections.synchronizedMap(new HashMap()); 117 118 // the native byte order of the underlying OS. "BIG" or "little" Endian 119 private static ByteOrder nativeByteOrder = ByteOrder.nativeOrder(); 120 121 private MimeDetectorRegistry mimeDetectorRegistry = new MimeDetectorRegistry(); 122 123 /** 124 * While MimeType(s) are being loaded by the MimeDetector(s) they should be 125 * added to the list of known MIME types. It is not mandatory for MimeDetector(s) 126 * to do so but they should where possible so that the list is as complete as possible. 127 * You can add other MIME types to this list using this method. You can then use the 128 * isMimeTypeKnown(...) utility methods to see if a MIME type you have 129 * matches one that the utility has already seen. 130 * <p> 131 * This can be used to limit the mime types you work with i.e. if its not been loaded 132 * then don't bother using it as it won't match. This is no guarantee that a match will not 133 * be found as it is possible that a particular MimeDetector does not have an initialisation 134 * phase that loads all of the MIME types it will match. 135 * </p> 136 * <p> 137 * For instance if you had a MIME type of abc/xyz and passed this to 138 * isMimeTypeKnown(...) it would return false unless you specifically add 139 * this to the know MIME types using this method. 140 * </p> 141 * 142 * @param mimeType 143 * a MIME type you want to add to the known MIME types. 144 * Duplicates are ignored. 145 * @see #isMimeTypeKnown(String mimeType) 146 * @see #isMimeTypeKnown(MimeType mimetType) 147 */ 148 public static void addKnownMimeType(final MimeType mimeType) { 149 addKnownMimeType(mimeType.toString()); 150 } 151 152 153 /** 154 * While MimeType(s) are being loaded by the MimeDetector(s) they should be 155 * added to the list of known MIME types. It is not mandatory for MimeDetector(s) 156 * to do so but they should where possible so that the list is as complete as possible. 157 * You can add other MIME types to this list using this method. You can then use the 158 * isMimeTypeKnown(...) utility methods to see if a MIME type you have 159 * matches one that the utility has already seen. 160 * <p> 161 * This can be used to limit the mime types you work with i.e. if its not been loaded 162 * then don't bother using it as it won't match. This is no guarantee that a match will not 163 * be found as it is possible that a particular MimeDetector does not have an initialisation 164 * phase that loads all of the MIME types it will match. 165 * </p> 166 * <p> 167 * For instance if you had a MIME type of abc/xyz and passed this to 168 * isMimeTypeKnown(...) it would return false unless you specifically add 169 * this to the know MIME types using this method. 170 * </p> 171 * 172 * @param mimeType 173 * a MIME type you want to add to the known MIME types. 174 * Duplicates are ignored. 175 * @see #isMimeTypeKnown(String mimetype) 176 * @see #isMimeTypeKnown(MimeType mimetType) 177 */ 178 public static void addKnownMimeType(final String mimeType) { 179 try { 180 181 String key = getMediaType(mimeType); 182 Set s = (Set) mimeTypes.get(key); 183 if (s == null) { 184 s = new TreeSet(); 185 } 186 s.add(getSubType(mimeType)); 187 mimeTypes.put(key, s); 188 } catch (MimeException ignore) { 189 // A couple of entries in the magic mime file don't follow the rules 190 // so ignore them 191 } 192 } 193 194 /** 195 * Returns a copy of the Collection of currently known MIME types as strings that have been 196 * registered either by the initialisation methods of the MimeDetector(s) or by the user. 197 */ 198 public static Collection getKnownMimeTypes() { 199 Collection mimeTypes = new ArrayList(); 200 Iterator i = MimeUtil2.mimeTypes.keySet().iterator(); 201 while(i.hasNext()) { 202 // Iterate through each set and compose the MIME types 203 String mediaType = (String)i.next(); 204 Iterator it = ((Set)MimeUtil2.mimeTypes.get(mediaType)).iterator(); 205 while(it.hasNext()) { 206 mimeTypes.add(mediaType + "/" + (String)it.next()); 207 } 208 } 209 return mimeTypes; 210 } 211 212 /** 213 * Register a MimeDetector and add it to the MimeDetector registry. 214 * MimeDetector(s) are effectively singletons as they are keyed against their 215 * fully qualified class name. 216 * @param mimeDetector. This must be the fully qualified name of a concrete instance of an 217 * AbstractMimeDetector class. 218 * This enforces that all custom MimeDetector(s) extend the AbstractMimeDetector. 219 * @see MimeDetector 220 */ 221 public MimeDetector registerMimeDetector(final String mimeDetector) { 222 return mimeDetectorRegistry.registerMimeDetector(mimeDetector); 223 } 224 225 /** 226 * Get the extension part of a file name defined by the file parameter. 227 * 228 * @param file 229 * a file object 230 * @return the file extension or null if it does not have one. 231 */ 232 public static String getExtension(final File file) { 233 return getExtension(file.getName()); 234 } 235 236 /** 237 * Get the extension part of a file name defined by the fileName parameter. 238 * There may be no extension or it could be a single part extension such as 239 * .bat or a multi-part extension such as .tar.gz 240 * 241 * @param fileName 242 * a relative or absolute path to a file 243 * @return the file extension or null if it does not have one. 244 */ 245 public static String getExtension(final String fileName) { 246 if(fileName == null || fileName.length() == 0) { 247 return ""; 248 } 249 int index = fileName.indexOf("."); 250 return index < 0 ? "" : fileName.substring(index + 1); 251 } 252 253 /** 254 * Get the first in a comma separated list of mime types. Useful when using 255 * extension mapping that can return multiple mime types separate by commas 256 * and you only want the first one. 257 * 258 * @param mimeTypes 259 * comma separated list of mime types 260 * @return first in a comma separated list of mime types or null if the mimeTypes string is null or empty 261 */ 262 public static MimeType getFirstMimeType(final String mimeTypes) { 263 if (mimeTypes != null && mimeTypes.trim().length() != 0) { 264 return new MimeType(mimeTypes.split(",")[0].trim()); 265 } 266 return null; 267 } 268 269 /** 270 * Utility method to get the major or media part of a mime type i.e. the bit before 271 * the '/' character 272 * 273 * @param mimeType 274 * you want to get the media part from 275 * @return media type of the mime type 276 * @throws MimeException 277 * if you pass in an invalid mime type structure 278 */ 279 public static String getMediaType(final String mimeType) 280 throws MimeException { 281 return new MimeType(mimeType).getMediaType(); 282 } 283 284 /** 285 * 286 * Utility method to get the quality part of a mime type. If it does not 287 * exist then it is always set to q=1.0 unless it's a wild card. For the 288 * major component wild card the value is set to 0.01 For the minor 289 * component wild card the value is set to 0.02 290 * <p> 291 * Thanks to the Apache organisation for these settings. 292 * 293 * @param mimeType 294 * a valid mime type string with or without a valid q parameter 295 * @return the quality value of the mime type either calculated from the 296 * rules above or the actual value defined. 297 * @throws MimeException 298 * this is thrown if the mime type pattern is invalid. 299 */ 300 public static double getMimeQuality(final String mimeType) throws MimeException { 301 if (mimeType == null) { 302 throw new MimeException("Invalid MimeType [" + mimeType + "]."); 303 } 304 String[] parts = mimeSplitter.split(mimeType); 305 if (parts.length < 2) { 306 throw new MimeException("Invalid MimeType [" + mimeType + "]."); 307 } 308 if (parts.length > 2) { 309 for (int i = 2; i < parts.length; i++) { 310 if (parts[i].trim().startsWith("q=")) { 311 // Get the number part 312 try { 313 // Get the quality factor 314 double d = Double.parseDouble(parts[i].split("=")[1] 315 .trim()); 316 return d > 1.0 ? 1.0 : d; 317 } catch (NumberFormatException e) { 318 throw new MimeException( 319 "Invalid MIME quality indicator [" 320 + parts[i].trim() 321 + "]. Must be a valid double between 0 and 1"); 322 } catch (Exception e) { 323 throw new MimeException( 324 "Error parsing MIME quality indicator.", e); 325 } 326 } 327 } 328 } 329 // No quality indicator so always assume its 1 unless a wild card is used 330 if (StringUtil.contains(parts[0], "*")) { 331 return 0.01; 332 } else if (StringUtil.contains(parts[1], "*")) { 333 return 0.02; 334 } else { 335 // Assume q value of 1 336 return 1.0; 337 } 338 } 339 340 /** 341 * Get a registered MimeDetector by name. 342 * @param name the name of a registered MimeDetector. This is always the fully qualified 343 * name of the class implementing the MimeDetector. 344 * @return 345 */ 346 public MimeDetector getMimeDetector(final String name) { 347 return mimeDetectorRegistry.getMimeDetector(name); 348 } 349 350 /** 351 * Get a Collection of possible MimeType(s) that this byte array could represent 352 * according to the registered MimeDetector(s). If no MimeType(s) are detected 353 * then the returned Collection will contain only the UNKNOWN_MIME_TYPE 354 * @param data 355 * @return all matching MimeType(s) 356 * @throws MimeException 357 */ 358 public final Collection getMimeTypes(final byte [] data) throws MimeException 359 { 360 return getMimeTypes(data, UNKNOWN_MIME_TYPE); 361 } 362 363 /** 364 * Get a Collection of possible MimeType(s) that this byte array could represent 365 * according to the registered MimeDetector(s). If no MimeType(s) are detected 366 * then the returned Collection will contain only the passed in unknownMimeType 367 * @param data 368 * @param unknownMimeType used if the registered MimeDetector(s) fail to match any MimeType(s) 369 * @return all matching MimeType(s) 370 * @throws MimeException 371 */ 372 public final Collection getMimeTypes(final byte [] data, final MimeType unknownMimeType) throws MimeException 373 { 374 Collection mimeTypes = new MimeTypeHashSet(); 375 if(data == null) { 376 log.error("byte array cannot be null."); 377 } else { 378 if(log.isDebugEnabled()) { 379 try { 380 log.debug("Getting MIME types for byte array [" + StringUtil.getHexString(data)+ "]."); 381 }catch(UnsupportedEncodingException e) { 382 throw new MimeException(e); 383 } 384 } 385 mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(data)); 386 387 // We don't want the unknownMimeType added to the collection by MimeDetector(s) 388 mimeTypes.remove(unknownMimeType); 389 } 390 391 // If the collection is empty we want to add the unknownMimetype 392 if(mimeTypes.isEmpty()) { 393 mimeTypes.add(unknownMimeType); 394 } 395 if(log.isDebugEnabled()) { 396 log.debug("Retrieved MIME types [" + mimeTypes.toString() + "]"); 397 } 398 return mimeTypes; 399 } 400 401 /** 402 * Get all of the matching mime types for this file object. 403 * The method delegates down to each of the registered MimeHandler(s) and returns a 404 * normalised list of all matching mime types. If no matching mime types are found the returned 405 * Collection will contain the default UNKNOWN_MIME_TYPE 406 * @param file the File object to detect. 407 * @return collection of matching MimeType(s) 408 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s) 409 * executed. 410 */ 411 public final Collection getMimeTypes(final File file) throws MimeException 412 { 413 return getMimeTypes(file, UNKNOWN_MIME_TYPE); 414 } 415 416 /** 417 * Get all of the matching mime types for this file object. 418 * The method delegates down to each of the registered MimeHandler(s) and returns a 419 * normalised list of all matching mime types. If no matching mime types are found the returned 420 * Collection will contain the unknownMimeType passed in. 421 * @param file the File object to detect. 422 * @param unknownMimeType. 423 * @return the Collection of matching mime types. If the collection would be empty i.e. no matches then this will 424 * contain the passed in parameter unknownMimeType 425 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s) 426 * executed. 427 */ 428 public final Collection getMimeTypes(final File file, final MimeType unknownMimeType) throws MimeException 429 { 430 Collection mimeTypes = new MimeTypeHashSet(); 431 432 if(file == null) { 433 log.error("File reference cannot be null."); 434 } else { 435 436 if(log.isDebugEnabled()) { 437 log.debug("Getting MIME types for file [" + file.getAbsolutePath() + "]."); 438 } 439 440 if(file.isDirectory()) { 441 mimeTypes.add(MimeUtil2.DIRECTORY_MIME_TYPE); 442 } else { 443 // Defer this call to the file name and stream methods 444 mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(file)); 445 446 // We don't want the unknownMimeType added to the collection by MimeDetector(s) 447 mimeTypes.remove(unknownMimeType); 448 } 449 } 450 // If the collection is empty we want to add the unknownMimetype 451 if(mimeTypes.isEmpty()) { 452 mimeTypes.add(unknownMimeType); 453 } 454 if(log.isDebugEnabled()) { 455 log.debug("Retrieved MIME types [" + mimeTypes.toString() + "]"); 456 } 457 return mimeTypes; 458 } 459 460 /** 461 * Get all of the matching mime types for this InputStream object. 462 * The method delegates down to each of the registered MimeHandler(s) and returns a 463 * normalised list of all matching mime types. If no matching mime types are found the returned 464 * Collection will contain the default UNKNOWN_MIME_TYPE 465 * @param in InputStream to detect. 466 * @return 467 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s) 468 * executed. 469 */ 470 public final Collection getMimeTypes(final InputStream in) throws MimeException 471 { 472 return getMimeTypes(in, UNKNOWN_MIME_TYPE); 473 } 474 475 /** 476 * Get all of the matching mime types for this InputStream object. 477 * The method delegates down to each of the registered MimeHandler(s) and returns a 478 * normalised list of all matching mime types. If no matching mime types are found the returned 479 * Collection will contain the unknownMimeType passed in. 480 * @param in the InputStream object to detect. 481 * @param unknownMimeType. 482 * @return the Collection of matching mime types. If the collection would be empty i.e. no matches then this will 483 * contain the passed in parameter unknownMimeType 484 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s) 485 * executed. 486 */ 487 public final Collection getMimeTypes(final InputStream in, final MimeType unknownMimeType) throws MimeException 488 { 489 Collection mimeTypes = new MimeTypeHashSet(); 490 491 if(in == null) { 492 log.error("InputStream reference cannot be null."); 493 } else { 494 if (!in.markSupported()) { 495 throw new MimeException("InputStream must support the mark() and reset() methods."); 496 } 497 if(log.isDebugEnabled()) { 498 log.debug("Getting MIME types for InputSteam [" + in + "]."); 499 } 500 mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(in)); 501 502 // We don't want the unknownMimeType added to the collection by MimeDetector(s) 503 mimeTypes.remove(unknownMimeType); 504 } 505 // If the collection is empty we want to add the unknownMimetype 506 if(mimeTypes.isEmpty()) { 507 mimeTypes.add(unknownMimeType); 508 } 509 if(log.isDebugEnabled()) { 510 log.debug("Retrieved MIME types [" + mimeTypes.toString() + "]"); 511 } 512 return mimeTypes; 513 } 514 515 /** 516 * Get all of the matching mime types for this file name. 517 * The method delegates down to each of the registered MimeHandler(s) and returns a 518 * normalised list of all matching mime types. If no matching mime types are found the returned 519 * Collection will contain the default UNKNOWN_MIME_TYPE 520 * @param fileName the name of a file to detect. 521 * @return collection of matching MimeType(s) 522 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s) 523 * executed. 524 */ 525 public final Collection getMimeTypes(final String fileName) throws MimeException 526 { 527 return getMimeTypes(fileName, UNKNOWN_MIME_TYPE); 528 } 529 530 /** 531 * Get all of the matching mime types for this file name . 532 * The method delegates down to each of the registered MimeHandler(s) and returns a 533 * normalised list of all matching mime types. If no matching mime types are found the returned 534 * Collection will contain the unknownMimeType passed in. 535 * @param fileName the name of a file to detect. 536 * @param unknownMimeType. 537 * @return the Collection of matching mime types. If the collection would be empty i.e. no matches then this will 538 * contain the passed in parameter unknownMimeType 539 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s) 540 * executed. 541 */ 542 public final Collection getMimeTypes(final String fileName, final MimeType unknownMimeType) throws MimeException 543 { 544 Collection mimeTypes = new MimeTypeHashSet(); 545 546 if(fileName == null) { 547 log.error("fileName cannot be null."); 548 } else { 549 if(log.isDebugEnabled()) { 550 log.debug("Getting MIME types for file name [" + fileName + "]."); 551 } 552 553 // Test if this is a directory 554 File file = new File(fileName); 555 556 if(file.isDirectory()) { 557 mimeTypes.add(MimeUtil2.DIRECTORY_MIME_TYPE); 558 } else { 559 mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(fileName)); 560 561 // We don't want the unknownMimeType added to the collection by MimeDetector(s) 562 mimeTypes.remove(unknownMimeType); 563 } 564 } 565 // If the collection is empty we want to add the unknownMimetype 566 if(mimeTypes.isEmpty()) { 567 mimeTypes.add(unknownMimeType); 568 } 569 if(log.isDebugEnabled()) { 570 log.debug("Retrieved MIME types [" + mimeTypes.toString() + "]"); 571 } 572 return mimeTypes; 573 574 } 575 576 /** 577 * Get all of the matching mime types for this URL object. 578 * The method delegates down to each of the registered MimeHandler(s) and returns a 579 * normalised list of all matching mime types. If no matching mime types are found the returned 580 * Collection will contain the default UNKNOWN_MIME_TYPE 581 * @param url a URL to detect. 582 * @return Collection of matching MimeType(s) 583 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s) 584 * executed. 585 */ 586 public final Collection getMimeTypes(final URL url) throws MimeException 587 { 588 return getMimeTypes(url, UNKNOWN_MIME_TYPE); 589 } 590 591 public final Collection getMimeTypes(final URL url, final MimeType unknownMimeType) throws MimeException 592 { 593 Collection mimeTypes = new MimeTypeHashSet(); 594 595 if(url == null) { 596 log.error("URL reference cannot be null."); 597 } else { 598 if(log.isDebugEnabled()) { 599 log.debug("Getting MIME types for URL [" + url + "]."); 600 } 601 602 // Test if this is a directory 603 File file = new File(url.getPath()); 604 if(file.isDirectory()) { 605 mimeTypes.add(MimeUtil2.DIRECTORY_MIME_TYPE); 606 } else { 607 // defer these calls to the file name and stream methods 608 mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(url)); 609 610 // We don't want the unknownMimeType added to the collection by MimeDetector(s) 611 mimeTypes.remove(unknownMimeType); 612 } 613 } 614 // If the collection is empty we want to add the unknownMimetype 615 if(mimeTypes.isEmpty()) { 616 mimeTypes.add(unknownMimeType); 617 } 618 if(log.isDebugEnabled()) { 619 log.debug("Retrieved MIME types [" + mimeTypes.toString() + "]"); 620 } 621 return mimeTypes; 622 } 623 624 /** 625 * Get the native byte order of the OS on which you are running. It will be 626 * either big or little endian. This is used internally for the magic mime 627 * rules mapping. 628 * 629 * @return ByteOrder 630 */ 631 public static ByteOrder getNativeOrder() { 632 return MimeUtil2.nativeByteOrder; 633 } 634 635 /** 636 * Gives you the best match for your requirements. 637 * <p> 638 * You can pass the accept header from a browser request to this method 639 * along with a comma separated list of possible mime types returned from 640 * say getExtensionMimeTypes(...) and the best match according to the accept 641 * header will be returned. 642 * </p> 643 * <p> 644 * The following is typical of what may be specified in an HTTP Accept 645 * header: 646 * </p> 647 * <p> 648 * Accept: text/xml, application/xml, application/xhtml+xml, 649 * text/html;q=0.9, text/plain;q=0.8, video/x-mng, image/png, image/jpeg, 650 * image/gif;q=0.2, text/css, */*;q=0.1 651 * </p> 652 * <p> 653 * The quality parameter (q) indicates how well the user agent handles the 654 * MIME type. A value of 1 indicates the MIME type is understood perfectly, 655 * and a value of 0 indicates the MIME type isn't understood at all. 656 * </p> 657 * <p> 658 * The reason the image/gif MIME type contains a quality parameter of 0.2, 659 * is to indicate that PNG & JPEG are preferred over GIF if the server is 660 * using content negotiation to deliver either a PNG or a GIF to user 661 * agents. Similarly, the text/html quality parameter has been lowered a 662 * little, to ensure that the XML MIME types are given in preference if 663 * content negotiation is being used to serve an XHTML document. 664 * </p> 665 * 666 * @param accept 667 * is a comma separated list of mime types you can accept 668 * including QoS parameters. Can pass the Accept: header 669 * directly. 670 * @param canProvide 671 * is a comma separated list of mime types that can be provided 672 * such as that returned from a call to 673 * getExtensionMimeTypes(...) 674 * @return the best matching mime type possible. 675 */ 676 public static MimeType getPreferedMimeType(String accept, final String canProvide) { 677 if (canProvide == null || canProvide.trim().length() == 0) { 678 throw new MimeException( 679 "Must specify at least one MIME type that can be provided."); 680 } 681 if (accept == null || accept.trim().length() == 0) { 682 accept = "*/*"; 683 } 684 685 // If an accept header is passed in then lets remove the Accept part 686 if (accept.indexOf(":") > 0) { 687 accept = accept.substring(accept.indexOf(":") + 1); 688 } 689 690 // Remove any unwanted spaces from the wanted mime types for instance 691 // text/html; q=0.4 692 accept = accept.replaceAll(" ", ""); 693 694 return getBestMatch(accept, getList(canProvide)); 695 } 696 697 /** 698 * Get the most specific match of the Collection of mime types passed in. 699 * The Collection 700 * @param mimeTypes this should be the Collection of mime types returned 701 * from a getMimeTypes(...) call. 702 * @return the most specific MimeType. If more than one of the mime types in the Collection 703 * have the same value then the first one found with this value in the Collection is returned. 704 */ 705 public static MimeType getMostSpecificMimeType(final Collection mimeTypes) { 706 MimeType mimeType = null; 707 int specificity = 0; 708 for(Iterator it = mimeTypes.iterator(); it.hasNext();) { 709 MimeType mt = (MimeType)it.next(); 710 if(mt.getSpecificity() > specificity) { 711 mimeType = mt; 712 specificity = mimeType.getSpecificity(); 713 } 714 } 715 return mimeType; 716 } 717 718 /** 719 * Utility method to get the minor part of a mime type i.e. the bit after 720 * the '/' character 721 * 722 * @param mimeType 723 * you want to get the minor part from 724 * @return sub type of the mime type 725 * @throws MimeException 726 * if you pass in an invalid mime type structure 727 */ 728 public static String getSubType(final String mimeType) 729 throws MimeException { 730 return new MimeType(mimeType).getSubType(); 731 } 732 733 /** 734 * Check to see if this mime type is one of the types seen during 735 * initialisation or has been added at some later stage using 736 * addKnownMimeType(...) 737 * 738 * @param mimeType 739 * @return true if the mimeType is in the list else false is returned 740 * @see #addKnownMimeType(String mimetype) 741 */ 742 public static boolean isMimeTypeKnown(final MimeType mimeType) { 743 try { 744 Set s = (Set) mimeTypes.get(mimeType.getMediaType()); 745 if (s == null) { 746 return false; 747 } 748 return s.contains(mimeType.getSubType()); 749 } catch (MimeException e) { 750 return false; 751 } 752 } 753 754 /** 755 * Check to see if this mime type is one of the types seen during 756 * initialisation or has been added at some later stage using 757 * addKnownMimeType(...) 758 * 759 * @param mimeType 760 * @return true if the mimeType is in the list else false is returned 761 * @see #addKnownMimeType(String mimetype) 762 */ 763 public static boolean isMimeTypeKnown(final String mimeType) { 764 return isMimeTypeKnown(new MimeType(mimeType)); 765 } 766 767 /** 768 * Utility convenience method to check if a particular MimeType instance is actually a TextMimeType. 769 * Used when iterating over a collection of MimeType's to help with casting to enable access 770 * the the TextMimeType methods not available to a standard MimeType. Can also use instanceof. 771 * @param mimeType 772 * @return true if the passed in instance is a TextMimeType 773 * @see MimeType 774 * @see TextMimeType 775 */ 776 public static boolean isTextMimeType(final MimeType mimeType) { 777 return mimeType instanceof TextMimeType; 778 } 779 780 /** 781 * Remove a previously registered MimeDetector 782 * @param mimeDetector 783 * @return the MimeDetector that was removed from the registry else null. 784 */ 785 public MimeDetector unregisterMimeDetector(final MimeDetector mimeDetector) { 786 return mimeDetectorRegistry.unregisterMimeDetector(mimeDetector); 787 } 788 789 /** 790 * Remove a previously registered MimeDetector 791 * @param mimeDetector 792 * @return the MimeDetector that was removed from the registry else null. 793 */ 794 public MimeDetector unregisterMimeDetector(final String mimeDetector) { 795 return mimeDetectorRegistry.unregisterMimeDetector(mimeDetector); 796 } 797 798 /** 799 * Get the quality parameter of this mime type i.e. the <code>q=</code> property. 800 * This method implements a value system similar to that used by the apache server i.e. 801 * if the media type is a * then it's <code>q</code> value is set to 0.01 and if the sub type is 802 * a * then the <code>q</code> value is set to 0.02 unless a specific <code>q</code> 803 * value is specified. If a <code>q</code> property is set it is limited to a max value of 1.0 804 * 805 * @param mimeType 806 * @return the quality value as a double between 0.0 and 1.0 807 * @throws MimeException 808 */ 809 public static double getQuality(final String mimeType) throws MimeException 810 { 811 return getMimeQuality(mimeType); 812 } 813 814 // Check each entry in each of the wanted lists against the entries in the 815 // can provide list. 816 // We take into consideration the QoS indicator 817 private static MimeType getBestMatch(final String accept, final List canProvideList) { 818 819 if (canProvideList.size() == 1) { 820 // If we only have one mime type that can be provided then thats 821 // what we provide even if 822 // the wanted list does not contain this entry or it's the worst 823 // QoS. 824 // This will cover the majority of cases 825 return new MimeType((String) canProvideList.get(0)); 826 } 827 828 Map wantedMap = normaliseWantedMap(accept, canProvideList); 829 830 MimeType bestMatch = null; 831 double qos = 0.0; 832 Iterator it = wantedMap.keySet().iterator(); 833 while (it.hasNext()) { 834 List wantedList = (List) wantedMap.get(it.next()); 835 Iterator it2 = wantedList.iterator(); 836 while (it2.hasNext()) { 837 String mimeType = (String) it2.next(); 838 double q = getMimeQuality(mimeType); 839 String majorComponent = getMediaType(mimeType); 840 String minorComponent = getSubType(mimeType); 841 if (q > qos) { 842 qos = q; 843 bestMatch = new MimeType(majorComponent + "/" + minorComponent); 844 } 845 } 846 } 847 // Gone through all the wanted list and found the best match possible 848 return bestMatch; 849 } 850 851 // Turn a comma separated string into a list 852 private static List getList(final String options) { 853 List list = new ArrayList(); 854 String[] array = options.split(","); 855 for (int i = 0; i < array.length; i++) { 856 list.add(array[i].trim()); 857 } 858 return list; 859 } 860 861 // Turn a comma separated string of accepted mime types into a Map 862 // based on the list of mime types that can be provided 863 private static Map normaliseWantedMap(final String accept, final List canProvide) { 864 Map map = new LinkedHashMap(); 865 String[] array = accept.split(","); 866 867 for (int i = 0; i < array.length; i++) { 868 String mimeType = array[i].trim(); 869 String major = getMediaType(mimeType); 870 String minor = getSubType(mimeType); 871 double qos = getMimeQuality(mimeType); 872 873 if (StringUtil.contains(major ,"*")) { 874 // All canProvide types are acceptable with the qos defined OR 875 // 0.01 if not defined 876 Iterator it = canProvide.iterator(); 877 while (it.hasNext()) { 878 String mt = (String) it.next(); 879 List list = (List) map.get(getMediaType(mt)); 880 if (list == null) { 881 list = new ArrayList(); 882 } 883 list.add(mt + ";q=" + qos); 884 map.put(getMediaType(mt), list); 885 } 886 } else if (StringUtil.contains(minor, "*")) { 887 Iterator it = canProvide.iterator(); 888 while (it.hasNext()) { 889 String mt = (String) it.next(); 890 if (getMediaType(mt).equals(major)) { 891 List list = (List) map.get(major); 892 if (list == null) { 893 list = new ArrayList(); 894 } 895 list.add(major + "/" + getSubType(mt) + ";q=" 896 + qos); 897 map.put(major, list); 898 } 899 } 900 901 } else { 902 if (canProvide.contains(major + "/" + minor)) { 903 List list = (List) map.get(major); 904 if (list == null) { 905 list = new ArrayList(); 906 } 907 list.add(major + "/" + minor + ";q=" + qos); 908 map.put(major, list); 909 } 910 } 911 } 912 return map; 913 } 914 915 /** 916 * Utility method to get the InputStream from a URL. Handles several schemes, for instance, if the URL points to a jar 917 * entry it will get a proper usable stream from the URL 918 * @param url 919 * @return 920 */ 921 public static InputStream getInputStreamForURL(URL url) throws Exception { 922 try { 923 return url.openStream(); 924 }catch(ZipException e) { 925 return ZipJarUtil.getInputStreamForURL(url); 926 } 927 } 928 } 929 930 /** 931 * <p> 932 * All methods in this class that return a Collection object actually return a {@link MimeTypeHashSet} that implements both the {@link Set} and {@link Collection} 933 * interfaces. 934 * </p> 935 936 * @author Steven McArdle 937 * 938 */ 939 class MimeDetectorRegistry { 940 941 private static Logger log = LoggerFactory.getLogger(MimeDetectorRegistry.class); 942 943 /** 944 * This property holds an instance of the TextMimeDetector. 945 * This is the only pre-registerd MimeDetector and cannot be 946 * de-registered or registered by your code 947 */ 948 private TextMimeDetector TextMimeDetector = new TextMimeDetector(1); 949 950 951 private Map mimeDetectors = new TreeMap(); 952 953 /** 954 * Use the fully qualified name of a MimeDetector and try to instantiate it if 955 * it's not already registered. If it's already registered then log a warning and 956 * return the already registered MimeDetector 957 * @param mimeDetector 958 * @return MimeDetector registered under this name. Returns null if an exception occurs 959 */ 960 MimeDetector registerMimeDetector(final String mimeDetector) { 961 if(mimeDetectors.containsKey(mimeDetector)) { 962 log.warn("MimeDetector [" + mimeDetector + "] will not be registered as a MimeDetector with this name is already registered."); 963 return (MimeDetector)mimeDetectors.get(mimeDetector); 964 } 965 // Create the mime detector if we can 966 try { 967 MimeDetector md = (MimeDetector)Class.forName(mimeDetector).newInstance(); 968 md.init(); 969 if(log.isDebugEnabled()) { 970 log.debug("Registering MimeDetector with name [" + md.getName() + "] and description [" + md.getDescription() + "]"); 971 } 972 mimeDetectors.put(mimeDetector, md); 973 return md; 974 }catch(Exception e) { 975 log.error("Exception while registering MimeDetector [" + mimeDetector + "].", e); 976 } 977 // Failed to create an instance 978 return null; 979 } 980 981 MimeDetector getMimeDetector(final String name) { 982 return (MimeDetector)mimeDetectors.get(name); 983 } 984 985 Collection getMimeTypes(final byte [] data) throws MimeException 986 { 987 Collection mimeTypes = new ArrayList(); 988 try { 989 if(!EncodingGuesser.getSupportedEncodings().isEmpty()) { 990 mimeTypes = TextMimeDetector.getMimeTypes(data); 991 } 992 }catch(UnsupportedOperationException ignore) { 993 // The TextMimeDetector will throw this if it decides 994 // the content is not text 995 } 996 for(Iterator it = mimeDetectors.values().iterator();it.hasNext();) { 997 try { 998 MimeDetector md = (MimeDetector)it.next(); 999 mimeTypes.addAll(md.getMimeTypes(data)); 1000 }catch(UnsupportedOperationException ignore) { 1001 // We ignore this as it indicates that this MimeDetector does not support 1002 // Getting mime types from files 1003 }catch(Exception e) { 1004 log.error(e.getLocalizedMessage(), e); 1005 } 1006 } 1007 return mimeTypes; 1008 } 1009 1010 1011 1012 Collection getMimeTypes(final String fileName) throws MimeException 1013 { 1014 Collection mimeTypes = new ArrayList(); 1015 try { 1016 if(!EncodingGuesser.getSupportedEncodings().isEmpty()) { 1017 mimeTypes = TextMimeDetector.getMimeTypes(fileName); 1018 } 1019 }catch(UnsupportedOperationException ignore) { 1020 // The TextMimeDetector will throw this if it decides 1021 // the content is not text 1022 } 1023 for(Iterator it = mimeDetectors.values().iterator();it.hasNext();) { 1024 try { 1025 MimeDetector md = (MimeDetector)it.next(); 1026 mimeTypes.addAll(md.getMimeTypes(fileName)); 1027 }catch(UnsupportedOperationException usoe) { 1028 // We ignore this as it indicates that this MimeDetector does not support 1029 // Getting mime types from streams 1030 }catch(Exception e) { 1031 log.error(e.getLocalizedMessage(), e); 1032 } 1033 } 1034 return mimeTypes; 1035 } 1036 1037 Collection getMimeTypes(final File file) throws MimeException 1038 { 1039 Collection mimeTypes = new ArrayList(); 1040 try { 1041 if(!EncodingGuesser.getSupportedEncodings().isEmpty()) { 1042 mimeTypes = TextMimeDetector.getMimeTypes(file); 1043 } 1044 }catch(UnsupportedOperationException ignore) { 1045 // The TextMimeDetector will throw this if it decides 1046 // the content is not text 1047 } 1048 for(Iterator it = mimeDetectors.values().iterator();it.hasNext();) { 1049 try { 1050 MimeDetector md = (MimeDetector)it.next(); 1051 mimeTypes.addAll(md.getMimeTypes(file)); 1052 }catch(UnsupportedOperationException usoe) { 1053 // We ignore this as it indicates that this MimeDetector does not support 1054 // Getting mime types from streams 1055 }catch(Exception e) { 1056 log.error(e.getLocalizedMessage(), e); 1057 } 1058 } 1059 return mimeTypes; 1060 } 1061 1062 Collection getMimeTypes(final InputStream in) throws MimeException 1063 { 1064 Collection mimeTypes = new ArrayList(); 1065 try { 1066 if(!EncodingGuesser.getSupportedEncodings().isEmpty()) { 1067 mimeTypes = TextMimeDetector.getMimeTypes(in); 1068 } 1069 }catch(UnsupportedOperationException ignore) { 1070 // The TextMimeDetector will throw this if it decides 1071 // the content is not text 1072 } 1073 for(Iterator it = mimeDetectors.values().iterator();it.hasNext();) { 1074 try { 1075 MimeDetector md = (MimeDetector)it.next(); 1076 mimeTypes.addAll(md.getMimeTypes(in)); 1077 }catch(UnsupportedOperationException usoe) { 1078 // We ignore this as it indicates that this MimeDetector does not support 1079 // Getting mime types from streams 1080 }catch(Exception e) { 1081 log.error(e.getLocalizedMessage(), e); 1082 } 1083 } 1084 return mimeTypes; 1085 } 1086 1087 Collection getMimeTypes(final URL url) throws MimeException 1088 { 1089 Collection mimeTypes = new ArrayList(); 1090 try { 1091 if(!EncodingGuesser.getSupportedEncodings().isEmpty()) { 1092 mimeTypes = TextMimeDetector.getMimeTypes(url); 1093 } 1094 }catch(UnsupportedOperationException ignore) { 1095 // The TextMimeDetector will throw this if it decides 1096 // the content is not text 1097 } 1098 for(Iterator it = mimeDetectors.values().iterator();it.hasNext();) { 1099 try { 1100 MimeDetector md = (MimeDetector)it.next(); 1101 mimeTypes.addAll(md.getMimeTypes(url)); 1102 }catch(UnsupportedOperationException usoe) { 1103 // We ignore this as it indicates that this MimeDetector does not support 1104 // Getting mime types from streams 1105 }catch(Exception e) { 1106 log.error(e.getLocalizedMessage(), e); 1107 } 1108 } 1109 return mimeTypes; 1110 } 1111 1112 MimeDetector unregisterMimeDetector(final String mimeDetector) { 1113 if(mimeDetector == null) { 1114 return null; 1115 } 1116 if(log.isDebugEnabled()) { 1117 log.debug("Unregistering MimeDetector [" + mimeDetector + "] from registry."); 1118 } 1119 try { 1120 MimeDetector md = (MimeDetector)mimeDetectors.get(mimeDetector); 1121 if(md != null) { 1122 md.delete(); 1123 return (MimeDetector)mimeDetectors.remove(mimeDetector); 1124 } 1125 }catch(Exception e) { 1126 log.error("Exception while un-registering MimeDetector [" + mimeDetector + "].", e); 1127 } 1128 1129 // Shouldn't get here 1130 return null; 1131 } 1132 1133 /** 1134 * unregister the MimeDetector from the list. 1135 * @param mimeDetector the MimeDetector to unregister 1136 * @return MimeDetector unregistered or null if it was not registered 1137 */ 1138 MimeDetector unregisterMimeDetector(final MimeDetector mimeDetector) { 1139 if(mimeDetector == null) { 1140 return null; 1141 } 1142 return unregisterMimeDetector(mimeDetector.getName()); 1143 } 1144 }