1 /*
2 * Copyright 2007-2009 Medsea Business Solutions S.L.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 package eu.medsea.mimeutil;
17
18 import java.io.File;
19 import java.io.InputStream;
20 import java.io.UnsupportedEncodingException;
21 import java.net.URL;
22 import java.nio.ByteOrder;
23 import java.util.ArrayList;
24 import java.util.Collection;
25 import java.util.Collections;
26 import java.util.HashMap;
27 import java.util.Iterator;
28 import java.util.LinkedHashMap;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.Set;
32 import java.util.TreeMap;
33 import java.util.TreeSet;
34 import java.util.regex.Pattern;
35 import java.util.zip.ZipException;
36
37 import org.slf4j.Logger;
38 import org.slf4j.LoggerFactory;
39
40 import eu.medsea.mimeutil.detector.MimeDetector;
41 import eu.medsea.util.EncodingGuesser;
42 import eu.medsea.util.StringUtil;
43 import eu.medsea.util.ZipJarUtil;
44
45 /**
46 * <p>
47 * The <code>MimeUtil2</code> is a utility class that allows applications to detect, work with and manipulate MIME types.
48 * </p>
49 * <p>
50 * A MIME or "Multipurpose Internet Mail Extension" type is an Internet standard that is important outside of just e-mail use.
51 * MIME is used extensively in other communications protocols such as HTTP for web communications.
52 * IANA "Internet Assigned Numbers Authority" is responsible for the standardisation and publication of MIME types. Basically any
53 * resource on any computer that can be located via a URL can be assigned a MIME type. So for instance, JPEG images have a MIME type
54 * of image/jpg. Some resources can have multiple MIME types associated with them such as files with an XML extension have the MIME types
55 * text/xml and application/xml and even specialised versions of xml such as image/svg+xml for SVG image files.
56 * </p>
57 * <p>
58 * To do this <code>MimeUtil2</code> uses registered <code>MimeDetector</code>(s) that are delegated too in sequence to actually
59 * perform the detection. There are several <code>MimeDetector</code> implementations that come with the utility and
60 * you can register and unregister them to perform detection based on file extensions, file globing and magic number detection.<br/>
61 * Their is also a fourth MimeDetector that is registered by default that detects text files and encodings. Unlike the other
62 * MimeDetector(s) or any MimeDetector(s) you may choose to implement, the TextMimeDetector cannot be registered or
63 * unregistered by your code. It is advisable that you read the java doc for the TextMimeDetector as it can be modified in
64 * several ways to make it perform better and or detect more specific types.<br/>
65 *
66 * Please refer to the java doc for each of these <code>MimeDetector</code>(s) for a description of how they
67 * actually perform their particular detection process.
68 * </p>
69 * <p>
70 * It is important to note that MIME matching is not an exact science, meaning
71 * that a positive match does not guarantee that the returned MIME type is actually correct.
72 * It is a best guess method of matching and the matched MIME types should be used with this in
73 * mind.
74 * </p>
75 * <p>
76 * New <code>MimeDetector</code>(s) can easily be created and registered with <code>MimeUtil2</code> to extend it's
77 * functionality beyond these initial detection strategies by extending the <code>AbstractMimeDetector</code> class.
78 * To see how to implement your own <code>MimeDetector</code> take a look
79 * at the java doc and source code for the {@link ExtensionMimeDetector}, {@link MagicMimeMimeDetector} and
80 * {@link OpendesktopMimeDetector} classes. To register and unregister MimeDetector(s) use the
81 * [un]registerMimeDetector(...) methods of this class.
82 * </p>
83 * <p>
84 * The order that the <code>MimeDetector</code>(s) are executed is defined by the order each <code>MimeDetector</code> is registered.
85 * </p>
86 * <p>
87 * The resulting <code>Collection</code> of mime types returned in response to a getMimeTypes(...) call is a normalised list of the
88 * accumulation of MIME types returned by each of the registered <code>MimeDetector</code>(s) that implement the specified getMimeTypesXXX(...)
89 * methods.
90 * </p>
91 * <p>
92 * All methods in this class that return a Collection object containing MimeType(s) actually return a {@link MimeTypeHashSet}
93 * that implements both the {@link Set} and {@link Collection} interfaces.
94 * </p>
95 *
96 * @author Steven McArdle.
97 * @since 2.1
98 *
99 */
100 public class MimeUtil2 {
101 private static Logger log = LoggerFactory.getLogger(MimeUtil2.class);
102
103 /**
104 * Mime type used to identify a directory
105 */
106 public static final MimeType DIRECTORY_MIME_TYPE = new MimeType("application/directory");
107 /**
108 * Mime type used to identify an unknown MIME type
109 */
110 public static final MimeType UNKNOWN_MIME_TYPE = new MimeType("application/octet-stream");
111
112 private static final Pattern mimeSplitter = Pattern.compile("[/;]++");
113
114 // All mime types known to the utility. This is synchronised for multi-threaded use
115 // and ALL instances of MimeUtil2 share this list.
116 private static Map mimeTypes = Collections.synchronizedMap(new HashMap());
117
118 // the native byte order of the underlying OS. "BIG" or "little" Endian
119 private static ByteOrder nativeByteOrder = ByteOrder.nativeOrder();
120
121 private MimeDetectorRegistry mimeDetectorRegistry = new MimeDetectorRegistry();
122
123 /**
124 * While MimeType(s) are being loaded by the MimeDetector(s) they should be
125 * added to the list of known MIME types. It is not mandatory for MimeDetector(s)
126 * to do so but they should where possible so that the list is as complete as possible.
127 * You can add other MIME types to this list using this method. You can then use the
128 * isMimeTypeKnown(...) utility methods to see if a MIME type you have
129 * matches one that the utility has already seen.
130 * <p>
131 * This can be used to limit the mime types you work with i.e. if its not been loaded
132 * then don't bother using it as it won't match. This is no guarantee that a match will not
133 * be found as it is possible that a particular MimeDetector does not have an initialisation
134 * phase that loads all of the MIME types it will match.
135 * </p>
136 * <p>
137 * For instance if you had a MIME type of abc/xyz and passed this to
138 * isMimeTypeKnown(...) it would return false unless you specifically add
139 * this to the know MIME types using this method.
140 * </p>
141 *
142 * @param mimeType
143 * a MIME type you want to add to the known MIME types.
144 * Duplicates are ignored.
145 * @see #isMimeTypeKnown(String mimeType)
146 * @see #isMimeTypeKnown(MimeType mimetType)
147 */
148 public static void addKnownMimeType(final MimeType mimeType) {
149 addKnownMimeType(mimeType.toString());
150 }
151
152
153 /**
154 * While MimeType(s) are being loaded by the MimeDetector(s) they should be
155 * added to the list of known MIME types. It is not mandatory for MimeDetector(s)
156 * to do so but they should where possible so that the list is as complete as possible.
157 * You can add other MIME types to this list using this method. You can then use the
158 * isMimeTypeKnown(...) utility methods to see if a MIME type you have
159 * matches one that the utility has already seen.
160 * <p>
161 * This can be used to limit the mime types you work with i.e. if its not been loaded
162 * then don't bother using it as it won't match. This is no guarantee that a match will not
163 * be found as it is possible that a particular MimeDetector does not have an initialisation
164 * phase that loads all of the MIME types it will match.
165 * </p>
166 * <p>
167 * For instance if you had a MIME type of abc/xyz and passed this to
168 * isMimeTypeKnown(...) it would return false unless you specifically add
169 * this to the know MIME types using this method.
170 * </p>
171 *
172 * @param mimeType
173 * a MIME type you want to add to the known MIME types.
174 * Duplicates are ignored.
175 * @see #isMimeTypeKnown(String mimetype)
176 * @see #isMimeTypeKnown(MimeType mimetType)
177 */
178 public static void addKnownMimeType(final String mimeType) {
179 try {
180
181 String key = getMediaType(mimeType);
182 Set s = (Set) mimeTypes.get(key);
183 if (s == null) {
184 s = new TreeSet();
185 }
186 s.add(getSubType(mimeType));
187 mimeTypes.put(key, s);
188 } catch (MimeException ignore) {
189 // A couple of entries in the magic mime file don't follow the rules
190 // so ignore them
191 }
192 }
193
194 /**
195 * Returns a copy of the Collection of currently known MIME types as strings that have been
196 * registered either by the initialisation methods of the MimeDetector(s) or by the user.
197 */
198 public static Collection getKnownMimeTypes() {
199 Collection mimeTypes = new ArrayList();
200 Iterator i = MimeUtil2.mimeTypes.keySet().iterator();
201 while(i.hasNext()) {
202 // Iterate through each set and compose the MIME types
203 String mediaType = (String)i.next();
204 Iterator it = ((Set)MimeUtil2.mimeTypes.get(mediaType)).iterator();
205 while(it.hasNext()) {
206 mimeTypes.add(mediaType + "/" + (String)it.next());
207 }
208 }
209 return mimeTypes;
210 }
211
212 /**
213 * Register a MimeDetector and add it to the MimeDetector registry.
214 * MimeDetector(s) are effectively singletons as they are keyed against their
215 * fully qualified class name.
216 * @param mimeDetector. This must be the fully qualified name of a concrete instance of an
217 * AbstractMimeDetector class.
218 * This enforces that all custom MimeDetector(s) extend the AbstractMimeDetector.
219 * @see MimeDetector
220 */
221 public MimeDetector registerMimeDetector(final String mimeDetector) {
222 return mimeDetectorRegistry.registerMimeDetector(mimeDetector);
223 }
224
225 /**
226 * Get the extension part of a file name defined by the file parameter.
227 *
228 * @param file
229 * a file object
230 * @return the file extension or null if it does not have one.
231 */
232 public static String getExtension(final File file) {
233 return getExtension(file.getName());
234 }
235
236 /**
237 * Get the extension part of a file name defined by the fileName parameter.
238 * There may be no extension or it could be a single part extension such as
239 * .bat or a multi-part extension such as .tar.gz
240 *
241 * @param fileName
242 * a relative or absolute path to a file
243 * @return the file extension or null if it does not have one.
244 */
245 public static String getExtension(final String fileName) {
246 if(fileName == null || fileName.length() == 0) {
247 return "";
248 }
249 int index = fileName.indexOf(".");
250 return index < 0 ? "" : fileName.substring(index + 1);
251 }
252
253 /**
254 * Get the first in a comma separated list of mime types. Useful when using
255 * extension mapping that can return multiple mime types separate by commas
256 * and you only want the first one.
257 *
258 * @param mimeTypes
259 * comma separated list of mime types
260 * @return first in a comma separated list of mime types or null if the mimeTypes string is null or empty
261 */
262 public static MimeType getFirstMimeType(final String mimeTypes) {
263 if (mimeTypes != null && mimeTypes.trim().length() != 0) {
264 return new MimeType(mimeTypes.split(",")[0].trim());
265 }
266 return null;
267 }
268
269 /**
270 * Utility method to get the major or media part of a mime type i.e. the bit before
271 * the '/' character
272 *
273 * @param mimeType
274 * you want to get the media part from
275 * @return media type of the mime type
276 * @throws MimeException
277 * if you pass in an invalid mime type structure
278 */
279 public static String getMediaType(final String mimeType)
280 throws MimeException {
281 return new MimeType(mimeType).getMediaType();
282 }
283
284 /**
285 *
286 * Utility method to get the quality part of a mime type. If it does not
287 * exist then it is always set to q=1.0 unless it's a wild card. For the
288 * major component wild card the value is set to 0.01 For the minor
289 * component wild card the value is set to 0.02
290 * <p>
291 * Thanks to the Apache organisation for these settings.
292 *
293 * @param mimeType
294 * a valid mime type string with or without a valid q parameter
295 * @return the quality value of the mime type either calculated from the
296 * rules above or the actual value defined.
297 * @throws MimeException
298 * this is thrown if the mime type pattern is invalid.
299 */
300 public static double getMimeQuality(final String mimeType) throws MimeException {
301 if (mimeType == null) {
302 throw new MimeException("Invalid MimeType [" + mimeType + "].");
303 }
304 String[] parts = mimeSplitter.split(mimeType);
305 if (parts.length < 2) {
306 throw new MimeException("Invalid MimeType [" + mimeType + "].");
307 }
308 if (parts.length > 2) {
309 for (int i = 2; i < parts.length; i++) {
310 if (parts[i].trim().startsWith("q=")) {
311 // Get the number part
312 try {
313 // Get the quality factor
314 double d = Double.parseDouble(parts[i].split("=")[1]
315 .trim());
316 return d > 1.0 ? 1.0 : d;
317 } catch (NumberFormatException e) {
318 throw new MimeException(
319 "Invalid MIME quality indicator ["
320 + parts[i].trim()
321 + "]. Must be a valid double between 0 and 1");
322 } catch (Exception e) {
323 throw new MimeException(
324 "Error parsing MIME quality indicator.", e);
325 }
326 }
327 }
328 }
329 // No quality indicator so always assume its 1 unless a wild card is used
330 if (StringUtil.contains(parts[0], "*")) {
331 return 0.01;
332 } else if (StringUtil.contains(parts[1], "*")) {
333 return 0.02;
334 } else {
335 // Assume q value of 1
336 return 1.0;
337 }
338 }
339
340 /**
341 * Get a registered MimeDetector by name.
342 * @param name the name of a registered MimeDetector. This is always the fully qualified
343 * name of the class implementing the MimeDetector.
344 * @return
345 */
346 public MimeDetector getMimeDetector(final String name) {
347 return mimeDetectorRegistry.getMimeDetector(name);
348 }
349
350 /**
351 * Get a Collection of possible MimeType(s) that this byte array could represent
352 * according to the registered MimeDetector(s). If no MimeType(s) are detected
353 * then the returned Collection will contain only the UNKNOWN_MIME_TYPE
354 * @param data
355 * @return all matching MimeType(s)
356 * @throws MimeException
357 */
358 public final Collection getMimeTypes(final byte [] data) throws MimeException
359 {
360 return getMimeTypes(data, UNKNOWN_MIME_TYPE);
361 }
362
363 /**
364 * Get a Collection of possible MimeType(s) that this byte array could represent
365 * according to the registered MimeDetector(s). If no MimeType(s) are detected
366 * then the returned Collection will contain only the passed in unknownMimeType
367 * @param data
368 * @param unknownMimeType used if the registered MimeDetector(s) fail to match any MimeType(s)
369 * @return all matching MimeType(s)
370 * @throws MimeException
371 */
372 public final Collection getMimeTypes(final byte [] data, final MimeType unknownMimeType) throws MimeException
373 {
374 Collection mimeTypes = new MimeTypeHashSet();
375 if(data == null) {
376 log.error("byte array cannot be null.");
377 } else {
378 if(log.isDebugEnabled()) {
379 try {
380 log.debug("Getting MIME types for byte array [" + StringUtil.getHexString(data)+ "].");
381 }catch(UnsupportedEncodingException e) {
382 throw new MimeException(e);
383 }
384 }
385 mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(data));
386
387 // We don't want the unknownMimeType added to the collection by MimeDetector(s)
388 mimeTypes.remove(unknownMimeType);
389 }
390
391 // If the collection is empty we want to add the unknownMimetype
392 if(mimeTypes.isEmpty()) {
393 mimeTypes.add(unknownMimeType);
394 }
395 if(log.isDebugEnabled()) {
396 log.debug("Retrieved MIME types [" + mimeTypes.toString() + "]");
397 }
398 return mimeTypes;
399 }
400
401 /**
402 * Get all of the matching mime types for this file object.
403 * The method delegates down to each of the registered MimeHandler(s) and returns a
404 * normalised list of all matching mime types. If no matching mime types are found the returned
405 * Collection will contain the default UNKNOWN_MIME_TYPE
406 * @param file the File object to detect.
407 * @return collection of matching MimeType(s)
408 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s)
409 * executed.
410 */
411 public final Collection getMimeTypes(final File file) throws MimeException
412 {
413 return getMimeTypes(file, UNKNOWN_MIME_TYPE);
414 }
415
416 /**
417 * Get all of the matching mime types for this file object.
418 * The method delegates down to each of the registered MimeHandler(s) and returns a
419 * normalised list of all matching mime types. If no matching mime types are found the returned
420 * Collection will contain the unknownMimeType passed in.
421 * @param file the File object to detect.
422 * @param unknownMimeType.
423 * @return the Collection of matching mime types. If the collection would be empty i.e. no matches then this will
424 * contain the passed in parameter unknownMimeType
425 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s)
426 * executed.
427 */
428 public final Collection getMimeTypes(final File file, final MimeType unknownMimeType) throws MimeException
429 {
430 Collection mimeTypes = new MimeTypeHashSet();
431
432 if(file == null) {
433 log.error("File reference cannot be null.");
434 } else {
435
436 if(log.isDebugEnabled()) {
437 log.debug("Getting MIME types for file [" + file.getAbsolutePath() + "].");
438 }
439
440 if(file.isDirectory()) {
441 mimeTypes.add(MimeUtil2.DIRECTORY_MIME_TYPE);
442 } else {
443 // Defer this call to the file name and stream methods
444 mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(file));
445
446 // We don't want the unknownMimeType added to the collection by MimeDetector(s)
447 mimeTypes.remove(unknownMimeType);
448 }
449 }
450 // If the collection is empty we want to add the unknownMimetype
451 if(mimeTypes.isEmpty()) {
452 mimeTypes.add(unknownMimeType);
453 }
454 if(log.isDebugEnabled()) {
455 log.debug("Retrieved MIME types [" + mimeTypes.toString() + "]");
456 }
457 return mimeTypes;
458 }
459
460 /**
461 * Get all of the matching mime types for this InputStream object.
462 * The method delegates down to each of the registered MimeHandler(s) and returns a
463 * normalised list of all matching mime types. If no matching mime types are found the returned
464 * Collection will contain the default UNKNOWN_MIME_TYPE
465 * @param in InputStream to detect.
466 * @return
467 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s)
468 * executed.
469 */
470 public final Collection getMimeTypes(final InputStream in) throws MimeException
471 {
472 return getMimeTypes(in, UNKNOWN_MIME_TYPE);
473 }
474
475 /**
476 * Get all of the matching mime types for this InputStream object.
477 * The method delegates down to each of the registered MimeHandler(s) and returns a
478 * normalised list of all matching mime types. If no matching mime types are found the returned
479 * Collection will contain the unknownMimeType passed in.
480 * @param in the InputStream object to detect.
481 * @param unknownMimeType.
482 * @return the Collection of matching mime types. If the collection would be empty i.e. no matches then this will
483 * contain the passed in parameter unknownMimeType
484 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s)
485 * executed.
486 */
487 public final Collection getMimeTypes(final InputStream in, final MimeType unknownMimeType) throws MimeException
488 {
489 Collection mimeTypes = new MimeTypeHashSet();
490
491 if(in == null) {
492 log.error("InputStream reference cannot be null.");
493 } else {
494 if (!in.markSupported()) {
495 throw new MimeException("InputStream must support the mark() and reset() methods.");
496 }
497 if(log.isDebugEnabled()) {
498 log.debug("Getting MIME types for InputSteam [" + in + "].");
499 }
500 mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(in));
501
502 // We don't want the unknownMimeType added to the collection by MimeDetector(s)
503 mimeTypes.remove(unknownMimeType);
504 }
505 // If the collection is empty we want to add the unknownMimetype
506 if(mimeTypes.isEmpty()) {
507 mimeTypes.add(unknownMimeType);
508 }
509 if(log.isDebugEnabled()) {
510 log.debug("Retrieved MIME types [" + mimeTypes.toString() + "]");
511 }
512 return mimeTypes;
513 }
514
515 /**
516 * Get all of the matching mime types for this file name.
517 * The method delegates down to each of the registered MimeHandler(s) and returns a
518 * normalised list of all matching mime types. If no matching mime types are found the returned
519 * Collection will contain the default UNKNOWN_MIME_TYPE
520 * @param fileName the name of a file to detect.
521 * @return collection of matching MimeType(s)
522 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s)
523 * executed.
524 */
525 public final Collection getMimeTypes(final String fileName) throws MimeException
526 {
527 return getMimeTypes(fileName, UNKNOWN_MIME_TYPE);
528 }
529
530 /**
531 * Get all of the matching mime types for this file name .
532 * The method delegates down to each of the registered MimeHandler(s) and returns a
533 * normalised list of all matching mime types. If no matching mime types are found the returned
534 * Collection will contain the unknownMimeType passed in.
535 * @param fileName the name of a file to detect.
536 * @param unknownMimeType.
537 * @return the Collection of matching mime types. If the collection would be empty i.e. no matches then this will
538 * contain the passed in parameter unknownMimeType
539 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s)
540 * executed.
541 */
542 public final Collection getMimeTypes(final String fileName, final MimeType unknownMimeType) throws MimeException
543 {
544 Collection mimeTypes = new MimeTypeHashSet();
545
546 if(fileName == null) {
547 log.error("fileName cannot be null.");
548 } else {
549 if(log.isDebugEnabled()) {
550 log.debug("Getting MIME types for file name [" + fileName + "].");
551 }
552
553 // Test if this is a directory
554 File file = new File(fileName);
555
556 if(file.isDirectory()) {
557 mimeTypes.add(MimeUtil2.DIRECTORY_MIME_TYPE);
558 } else {
559 mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(fileName));
560
561 // We don't want the unknownMimeType added to the collection by MimeDetector(s)
562 mimeTypes.remove(unknownMimeType);
563 }
564 }
565 // If the collection is empty we want to add the unknownMimetype
566 if(mimeTypes.isEmpty()) {
567 mimeTypes.add(unknownMimeType);
568 }
569 if(log.isDebugEnabled()) {
570 log.debug("Retrieved MIME types [" + mimeTypes.toString() + "]");
571 }
572 return mimeTypes;
573
574 }
575
576 /**
577 * Get all of the matching mime types for this URL object.
578 * The method delegates down to each of the registered MimeHandler(s) and returns a
579 * normalised list of all matching mime types. If no matching mime types are found the returned
580 * Collection will contain the default UNKNOWN_MIME_TYPE
581 * @param url a URL to detect.
582 * @return Collection of matching MimeType(s)
583 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s)
584 * executed.
585 */
586 public final Collection getMimeTypes(final URL url) throws MimeException
587 {
588 return getMimeTypes(url, UNKNOWN_MIME_TYPE);
589 }
590
591 public final Collection getMimeTypes(final URL url, final MimeType unknownMimeType) throws MimeException
592 {
593 Collection mimeTypes = new MimeTypeHashSet();
594
595 if(url == null) {
596 log.error("URL reference cannot be null.");
597 } else {
598 if(log.isDebugEnabled()) {
599 log.debug("Getting MIME types for URL [" + url + "].");
600 }
601
602 // Test if this is a directory
603 File file = new File(url.getPath());
604 if(file.isDirectory()) {
605 mimeTypes.add(MimeUtil2.DIRECTORY_MIME_TYPE);
606 } else {
607 // defer these calls to the file name and stream methods
608 mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(url));
609
610 // We don't want the unknownMimeType added to the collection by MimeDetector(s)
611 mimeTypes.remove(unknownMimeType);
612 }
613 }
614 // If the collection is empty we want to add the unknownMimetype
615 if(mimeTypes.isEmpty()) {
616 mimeTypes.add(unknownMimeType);
617 }
618 if(log.isDebugEnabled()) {
619 log.debug("Retrieved MIME types [" + mimeTypes.toString() + "]");
620 }
621 return mimeTypes;
622 }
623
624 /**
625 * Get the native byte order of the OS on which you are running. It will be
626 * either big or little endian. This is used internally for the magic mime
627 * rules mapping.
628 *
629 * @return ByteOrder
630 */
631 public static ByteOrder getNativeOrder() {
632 return MimeUtil2.nativeByteOrder;
633 }
634
635 /**
636 * Gives you the best match for your requirements.
637 * <p>
638 * You can pass the accept header from a browser request to this method
639 * along with a comma separated list of possible mime types returned from
640 * say getExtensionMimeTypes(...) and the best match according to the accept
641 * header will be returned.
642 * </p>
643 * <p>
644 * The following is typical of what may be specified in an HTTP Accept
645 * header:
646 * </p>
647 * <p>
648 * Accept: text/xml, application/xml, application/xhtml+xml,
649 * text/html;q=0.9, text/plain;q=0.8, video/x-mng, image/png, image/jpeg,
650 * image/gif;q=0.2, text/css, */*;q=0.1
651 * </p>
652 * <p>
653 * The quality parameter (q) indicates how well the user agent handles the
654 * MIME type. A value of 1 indicates the MIME type is understood perfectly,
655 * and a value of 0 indicates the MIME type isn't understood at all.
656 * </p>
657 * <p>
658 * The reason the image/gif MIME type contains a quality parameter of 0.2,
659 * is to indicate that PNG & JPEG are preferred over GIF if the server is
660 * using content negotiation to deliver either a PNG or a GIF to user
661 * agents. Similarly, the text/html quality parameter has been lowered a
662 * little, to ensure that the XML MIME types are given in preference if
663 * content negotiation is being used to serve an XHTML document.
664 * </p>
665 *
666 * @param accept
667 * is a comma separated list of mime types you can accept
668 * including QoS parameters. Can pass the Accept: header
669 * directly.
670 * @param canProvide
671 * is a comma separated list of mime types that can be provided
672 * such as that returned from a call to
673 * getExtensionMimeTypes(...)
674 * @return the best matching mime type possible.
675 */
676 public static MimeType getPreferedMimeType(String accept, final String canProvide) {
677 if (canProvide == null || canProvide.trim().length() == 0) {
678 throw new MimeException(
679 "Must specify at least one MIME type that can be provided.");
680 }
681 if (accept == null || accept.trim().length() == 0) {
682 accept = "*/*";
683 }
684
685 // If an accept header is passed in then lets remove the Accept part
686 if (accept.indexOf(":") > 0) {
687 accept = accept.substring(accept.indexOf(":") + 1);
688 }
689
690 // Remove any unwanted spaces from the wanted mime types for instance
691 // text/html; q=0.4
692 accept = accept.replaceAll(" ", "");
693
694 return getBestMatch(accept, getList(canProvide));
695 }
696
697 /**
698 * Get the most specific match of the Collection of mime types passed in.
699 * The Collection
700 * @param mimeTypes this should be the Collection of mime types returned
701 * from a getMimeTypes(...) call.
702 * @return the most specific MimeType. If more than one of the mime types in the Collection
703 * have the same value then the first one found with this value in the Collection is returned.
704 */
705 public static MimeType getMostSpecificMimeType(final Collection mimeTypes) {
706 MimeType mimeType = null;
707 int specificity = 0;
708 for(Iterator it = mimeTypes.iterator(); it.hasNext();) {
709 MimeType mt = (MimeType)it.next();
710 if(mt.getSpecificity() > specificity) {
711 mimeType = mt;
712 specificity = mimeType.getSpecificity();
713 }
714 }
715 return mimeType;
716 }
717
718 /**
719 * Utility method to get the minor part of a mime type i.e. the bit after
720 * the '/' character
721 *
722 * @param mimeType
723 * you want to get the minor part from
724 * @return sub type of the mime type
725 * @throws MimeException
726 * if you pass in an invalid mime type structure
727 */
728 public static String getSubType(final String mimeType)
729 throws MimeException {
730 return new MimeType(mimeType).getSubType();
731 }
732
733 /**
734 * Check to see if this mime type is one of the types seen during
735 * initialisation or has been added at some later stage using
736 * addKnownMimeType(...)
737 *
738 * @param mimeType
739 * @return true if the mimeType is in the list else false is returned
740 * @see #addKnownMimeType(String mimetype)
741 */
742 public static boolean isMimeTypeKnown(final MimeType mimeType) {
743 try {
744 Set s = (Set) mimeTypes.get(mimeType.getMediaType());
745 if (s == null) {
746 return false;
747 }
748 return s.contains(mimeType.getSubType());
749 } catch (MimeException e) {
750 return false;
751 }
752 }
753
754 /**
755 * Check to see if this mime type is one of the types seen during
756 * initialisation or has been added at some later stage using
757 * addKnownMimeType(...)
758 *
759 * @param mimeType
760 * @return true if the mimeType is in the list else false is returned
761 * @see #addKnownMimeType(String mimetype)
762 */
763 public static boolean isMimeTypeKnown(final String mimeType) {
764 return isMimeTypeKnown(new MimeType(mimeType));
765 }
766
767 /**
768 * Utility convenience method to check if a particular MimeType instance is actually a TextMimeType.
769 * Used when iterating over a collection of MimeType's to help with casting to enable access
770 * the the TextMimeType methods not available to a standard MimeType. Can also use instanceof.
771 * @param mimeType
772 * @return true if the passed in instance is a TextMimeType
773 * @see MimeType
774 * @see TextMimeType
775 */
776 public static boolean isTextMimeType(final MimeType mimeType) {
777 return mimeType instanceof TextMimeType;
778 }
779
780 /**
781 * Remove a previously registered MimeDetector
782 * @param mimeDetector
783 * @return the MimeDetector that was removed from the registry else null.
784 */
785 public MimeDetector unregisterMimeDetector(final MimeDetector mimeDetector) {
786 return mimeDetectorRegistry.unregisterMimeDetector(mimeDetector);
787 }
788
789 /**
790 * Remove a previously registered MimeDetector
791 * @param mimeDetector
792 * @return the MimeDetector that was removed from the registry else null.
793 */
794 public MimeDetector unregisterMimeDetector(final String mimeDetector) {
795 return mimeDetectorRegistry.unregisterMimeDetector(mimeDetector);
796 }
797
798 /**
799 * Get the quality parameter of this mime type i.e. the <code>q=</code> property.
800 * This method implements a value system similar to that used by the apache server i.e.
801 * if the media type is a * then it's <code>q</code> value is set to 0.01 and if the sub type is
802 * a * then the <code>q</code> value is set to 0.02 unless a specific <code>q</code>
803 * value is specified. If a <code>q</code> property is set it is limited to a max value of 1.0
804 *
805 * @param mimeType
806 * @return the quality value as a double between 0.0 and 1.0
807 * @throws MimeException
808 */
809 public static double getQuality(final String mimeType) throws MimeException
810 {
811 return getMimeQuality(mimeType);
812 }
813
814 // Check each entry in each of the wanted lists against the entries in the
815 // can provide list.
816 // We take into consideration the QoS indicator
817 private static MimeType getBestMatch(final String accept, final List canProvideList) {
818
819 if (canProvideList.size() == 1) {
820 // If we only have one mime type that can be provided then thats
821 // what we provide even if
822 // the wanted list does not contain this entry or it's the worst
823 // QoS.
824 // This will cover the majority of cases
825 return new MimeType((String) canProvideList.get(0));
826 }
827
828 Map wantedMap = normaliseWantedMap(accept, canProvideList);
829
830 MimeType bestMatch = null;
831 double qos = 0.0;
832 Iterator it = wantedMap.keySet().iterator();
833 while (it.hasNext()) {
834 List wantedList = (List) wantedMap.get(it.next());
835 Iterator it2 = wantedList.iterator();
836 while (it2.hasNext()) {
837 String mimeType = (String) it2.next();
838 double q = getMimeQuality(mimeType);
839 String majorComponent = getMediaType(mimeType);
840 String minorComponent = getSubType(mimeType);
841 if (q > qos) {
842 qos = q;
843 bestMatch = new MimeType(majorComponent + "/" + minorComponent);
844 }
845 }
846 }
847 // Gone through all the wanted list and found the best match possible
848 return bestMatch;
849 }
850
851 // Turn a comma separated string into a list
852 private static List getList(final String options) {
853 List list = new ArrayList();
854 String[] array = options.split(",");
855 for (int i = 0; i < array.length; i++) {
856 list.add(array[i].trim());
857 }
858 return list;
859 }
860
861 // Turn a comma separated string of accepted mime types into a Map
862 // based on the list of mime types that can be provided
863 private static Map normaliseWantedMap(final String accept, final List canProvide) {
864 Map map = new LinkedHashMap();
865 String[] array = accept.split(",");
866
867 for (int i = 0; i < array.length; i++) {
868 String mimeType = array[i].trim();
869 String major = getMediaType(mimeType);
870 String minor = getSubType(mimeType);
871 double qos = getMimeQuality(mimeType);
872
873 if (StringUtil.contains(major ,"*")) {
874 // All canProvide types are acceptable with the qos defined OR
875 // 0.01 if not defined
876 Iterator it = canProvide.iterator();
877 while (it.hasNext()) {
878 String mt = (String) it.next();
879 List list = (List) map.get(getMediaType(mt));
880 if (list == null) {
881 list = new ArrayList();
882 }
883 list.add(mt + ";q=" + qos);
884 map.put(getMediaType(mt), list);
885 }
886 } else if (StringUtil.contains(minor, "*")) {
887 Iterator it = canProvide.iterator();
888 while (it.hasNext()) {
889 String mt = (String) it.next();
890 if (getMediaType(mt).equals(major)) {
891 List list = (List) map.get(major);
892 if (list == null) {
893 list = new ArrayList();
894 }
895 list.add(major + "/" + getSubType(mt) + ";q="
896 + qos);
897 map.put(major, list);
898 }
899 }
900
901 } else {
902 if (canProvide.contains(major + "/" + minor)) {
903 List list = (List) map.get(major);
904 if (list == null) {
905 list = new ArrayList();
906 }
907 list.add(major + "/" + minor + ";q=" + qos);
908 map.put(major, list);
909 }
910 }
911 }
912 return map;
913 }
914
915 /**
916 * Utility method to get the InputStream from a URL. Handles several schemes, for instance, if the URL points to a jar
917 * entry it will get a proper usable stream from the URL
918 * @param url
919 * @return
920 */
921 public static InputStream getInputStreamForURL(URL url) throws Exception {
922 try {
923 return url.openStream();
924 }catch(ZipException e) {
925 return ZipJarUtil.getInputStreamForURL(url);
926 }
927 }
928 }
929
930 /**
931 * <p>
932 * All methods in this class that return a Collection object actually return a {@link MimeTypeHashSet} that implements both the {@link Set} and {@link Collection}
933 * interfaces.
934 * </p>
935
936 * @author Steven McArdle
937 *
938 */
939 class MimeDetectorRegistry {
940
941 private static Logger log = LoggerFactory.getLogger(MimeDetectorRegistry.class);
942
943 /**
944 * This property holds an instance of the TextMimeDetector.
945 * This is the only pre-registerd MimeDetector and cannot be
946 * de-registered or registered by your code
947 */
948 private TextMimeDetector TextMimeDetector = new TextMimeDetector(1);
949
950
951 private Map mimeDetectors = new TreeMap();
952
953 /**
954 * Use the fully qualified name of a MimeDetector and try to instantiate it if
955 * it's not already registered. If it's already registered then log a warning and
956 * return the already registered MimeDetector
957 * @param mimeDetector
958 * @return MimeDetector registered under this name. Returns null if an exception occurs
959 */
960 MimeDetector registerMimeDetector(final String mimeDetector) {
961 if(mimeDetectors.containsKey(mimeDetector)) {
962 log.warn("MimeDetector [" + mimeDetector + "] will not be registered as a MimeDetector with this name is already registered.");
963 return (MimeDetector)mimeDetectors.get(mimeDetector);
964 }
965 // Create the mime detector if we can
966 try {
967 MimeDetector md = (MimeDetector)Class.forName(mimeDetector).newInstance();
968 md.init();
969 if(log.isDebugEnabled()) {
970 log.debug("Registering MimeDetector with name [" + md.getName() + "] and description [" + md.getDescription() + "]");
971 }
972 mimeDetectors.put(mimeDetector, md);
973 return md;
974 }catch(Exception e) {
975 log.error("Exception while registering MimeDetector [" + mimeDetector + "].", e);
976 }
977 // Failed to create an instance
978 return null;
979 }
980
981 MimeDetector getMimeDetector(final String name) {
982 return (MimeDetector)mimeDetectors.get(name);
983 }
984
985 Collection getMimeTypes(final byte [] data) throws MimeException
986 {
987 Collection mimeTypes = new ArrayList();
988 try {
989 if(!EncodingGuesser.getSupportedEncodings().isEmpty()) {
990 mimeTypes = TextMimeDetector.getMimeTypes(data);
991 }
992 }catch(UnsupportedOperationException ignore) {
993 // The TextMimeDetector will throw this if it decides
994 // the content is not text
995 }
996 for(Iterator it = mimeDetectors.values().iterator();it.hasNext();) {
997 try {
998 MimeDetector md = (MimeDetector)it.next();
999 mimeTypes.addAll(md.getMimeTypes(data));
1000 }catch(UnsupportedOperationException ignore) {
1001 // We ignore this as it indicates that this MimeDetector does not support
1002 // Getting mime types from files
1003 }catch(Exception e) {
1004 log.error(e.getLocalizedMessage(), e);
1005 }
1006 }
1007 return mimeTypes;
1008 }
1009
1010
1011
1012 Collection getMimeTypes(final String fileName) throws MimeException
1013 {
1014 Collection mimeTypes = new ArrayList();
1015 try {
1016 if(!EncodingGuesser.getSupportedEncodings().isEmpty()) {
1017 mimeTypes = TextMimeDetector.getMimeTypes(fileName);
1018 }
1019 }catch(UnsupportedOperationException ignore) {
1020 // The TextMimeDetector will throw this if it decides
1021 // the content is not text
1022 }
1023 for(Iterator it = mimeDetectors.values().iterator();it.hasNext();) {
1024 try {
1025 MimeDetector md = (MimeDetector)it.next();
1026 mimeTypes.addAll(md.getMimeTypes(fileName));
1027 }catch(UnsupportedOperationException usoe) {
1028 // We ignore this as it indicates that this MimeDetector does not support
1029 // Getting mime types from streams
1030 }catch(Exception e) {
1031 log.error(e.getLocalizedMessage(), e);
1032 }
1033 }
1034 return mimeTypes;
1035 }
1036
1037 Collection getMimeTypes(final File file) throws MimeException
1038 {
1039 Collection mimeTypes = new ArrayList();
1040 try {
1041 if(!EncodingGuesser.getSupportedEncodings().isEmpty()) {
1042 mimeTypes = TextMimeDetector.getMimeTypes(file);
1043 }
1044 }catch(UnsupportedOperationException ignore) {
1045 // The TextMimeDetector will throw this if it decides
1046 // the content is not text
1047 }
1048 for(Iterator it = mimeDetectors.values().iterator();it.hasNext();) {
1049 try {
1050 MimeDetector md = (MimeDetector)it.next();
1051 mimeTypes.addAll(md.getMimeTypes(file));
1052 }catch(UnsupportedOperationException usoe) {
1053 // We ignore this as it indicates that this MimeDetector does not support
1054 // Getting mime types from streams
1055 }catch(Exception e) {
1056 log.error(e.getLocalizedMessage(), e);
1057 }
1058 }
1059 return mimeTypes;
1060 }
1061
1062 Collection getMimeTypes(final InputStream in) throws MimeException
1063 {
1064 Collection mimeTypes = new ArrayList();
1065 try {
1066 if(!EncodingGuesser.getSupportedEncodings().isEmpty()) {
1067 mimeTypes = TextMimeDetector.getMimeTypes(in);
1068 }
1069 }catch(UnsupportedOperationException ignore) {
1070 // The TextMimeDetector will throw this if it decides
1071 // the content is not text
1072 }
1073 for(Iterator it = mimeDetectors.values().iterator();it.hasNext();) {
1074 try {
1075 MimeDetector md = (MimeDetector)it.next();
1076 mimeTypes.addAll(md.getMimeTypes(in));
1077 }catch(UnsupportedOperationException usoe) {
1078 // We ignore this as it indicates that this MimeDetector does not support
1079 // Getting mime types from streams
1080 }catch(Exception e) {
1081 log.error(e.getLocalizedMessage(), e);
1082 }
1083 }
1084 return mimeTypes;
1085 }
1086
1087 Collection getMimeTypes(final URL url) throws MimeException
1088 {
1089 Collection mimeTypes = new ArrayList();
1090 try {
1091 if(!EncodingGuesser.getSupportedEncodings().isEmpty()) {
1092 mimeTypes = TextMimeDetector.getMimeTypes(url);
1093 }
1094 }catch(UnsupportedOperationException ignore) {
1095 // The TextMimeDetector will throw this if it decides
1096 // the content is not text
1097 }
1098 for(Iterator it = mimeDetectors.values().iterator();it.hasNext();) {
1099 try {
1100 MimeDetector md = (MimeDetector)it.next();
1101 mimeTypes.addAll(md.getMimeTypes(url));
1102 }catch(UnsupportedOperationException usoe) {
1103 // We ignore this as it indicates that this MimeDetector does not support
1104 // Getting mime types from streams
1105 }catch(Exception e) {
1106 log.error(e.getLocalizedMessage(), e);
1107 }
1108 }
1109 return mimeTypes;
1110 }
1111
1112 MimeDetector unregisterMimeDetector(final String mimeDetector) {
1113 if(mimeDetector == null) {
1114 return null;
1115 }
1116 if(log.isDebugEnabled()) {
1117 log.debug("Unregistering MimeDetector [" + mimeDetector + "] from registry.");
1118 }
1119 try {
1120 MimeDetector md = (MimeDetector)mimeDetectors.get(mimeDetector);
1121 if(md != null) {
1122 md.delete();
1123 return (MimeDetector)mimeDetectors.remove(mimeDetector);
1124 }
1125 }catch(Exception e) {
1126 log.error("Exception while un-registering MimeDetector [" + mimeDetector + "].", e);
1127 }
1128
1129 // Shouldn't get here
1130 return null;
1131 }
1132
1133 /**
1134 * unregister the MimeDetector from the list.
1135 * @param mimeDetector the MimeDetector to unregister
1136 * @return MimeDetector unregistered or null if it was not registered
1137 */
1138 MimeDetector unregisterMimeDetector(final MimeDetector mimeDetector) {
1139 if(mimeDetector == null) {
1140 return null;
1141 }
1142 return unregisterMimeDetector(mimeDetector.getName());
1143 }
1144 }