View Javadoc

1   /*
2    * Copyright 2007-2009 Medsea Business Solutions S.L.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package eu.medsea.mimeutil;
17  
18  import java.io.File;
19  import java.io.InputStream;
20  import java.io.UnsupportedEncodingException;
21  import java.net.URL;
22  import java.nio.ByteOrder;
23  import java.util.ArrayList;
24  import java.util.Collection;
25  import java.util.Collections;
26  import java.util.HashMap;
27  import java.util.Iterator;
28  import java.util.LinkedHashMap;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.Set;
32  import java.util.TreeMap;
33  import java.util.TreeSet;
34  import java.util.regex.Pattern;
35  import java.util.zip.ZipException;
36  
37  import org.slf4j.Logger;
38  import org.slf4j.LoggerFactory;
39  
40  import eu.medsea.mimeutil.detector.MimeDetector;
41  import eu.medsea.util.EncodingGuesser;
42  import eu.medsea.util.StringUtil;
43  import eu.medsea.util.ZipJarUtil;
44  
45  /**
46   * <p>
47   * The <code>MimeUtil2</code> is a utility class that allows applications to detect, work with and manipulate MIME types.
48   * </p>
49   * <p>
50   * A MIME or "Multipurpose Internet Mail Extension" type is an Internet standard that is important outside of just e-mail use.
51   * MIME is used extensively in other communications protocols such as HTTP for web communications.
52   * IANA "Internet Assigned Numbers Authority" is responsible for the standardisation and publication of MIME types. Basically any
53   * resource on any computer that can be located via a URL can be assigned a MIME type. So for instance, JPEG images have a MIME type
54   * of image/jpg. Some resources can have multiple MIME types associated with them such as files with an XML extension have the MIME types
55   * text/xml and application/xml and even specialised versions of xml such as image/svg+xml for SVG image files.
56   * </p>
57   * <p>
58   * To do this <code>MimeUtil2</code> uses registered <code>MimeDetector</code>(s) that are delegated too in sequence to actually
59   * perform the detection. There are several <code>MimeDetector</code> implementations that come with the utility and
60   * you can register and unregister them to perform detection based on file extensions, file globing and magic number detection.<br/>
61   * Their is also a fourth MimeDetector that is registered by default that detects text files and encodings. Unlike the other
62   * MimeDetector(s) or any MimeDetector(s) you may choose to implement, the TextMimeDetector cannot be registered or
63   * unregistered by your code. It is advisable that you read the java doc for the TextMimeDetector as it can be modified in
64   * several ways to make it perform better and or detect more specific types.<br/>
65   *
66   * Please refer to the java doc for each of these <code>MimeDetector</code>(s) for a description of how they
67   * actually perform their particular detection process.
68   * </p>
69   * <p>
70   * It is important to note that MIME matching is not an exact science, meaning
71   * that a positive match does not guarantee that the returned MIME type is actually correct.
72   * It is a best guess method of matching and the matched MIME types should be used with this in
73   * mind.
74   * </p>
75   * <p>
76   * New <code>MimeDetector</code>(s) can easily be created and registered with <code>MimeUtil2</code> to extend it's
77   * functionality beyond these initial detection strategies by extending the <code>AbstractMimeDetector</code> class.
78   * To see how to implement your own <code>MimeDetector</code> take a look
79   * at the java doc and source code for the {@link ExtensionMimeDetector}, {@link MagicMimeMimeDetector} and
80   * {@link OpendesktopMimeDetector} classes. To register and unregister MimeDetector(s) use the
81   * [un]registerMimeDetector(...) methods of this class.
82   * </p>
83   * <p>
84   * The order that the <code>MimeDetector</code>(s) are executed is defined by the order each <code>MimeDetector</code> is registered.
85   * </p>
86   * <p>
87   * The resulting <code>Collection</code> of mime types returned in response to a getMimeTypes(...) call is a normalised list of the
88   * accumulation of MIME types returned by each of the registered <code>MimeDetector</code>(s) that implement the specified getMimeTypesXXX(...)
89   * methods.
90   * </p>
91   * <p>
92   * All methods in this class that return a Collection object containing MimeType(s) actually return a {@link MimeTypeHashSet}
93   * that implements both the {@link Set} and {@link Collection} interfaces.
94   * </p>
95   *
96   * @author Steven McArdle.
97   * @since 2.1
98   *
99   */
100 public class MimeUtil2 {
101 	private static Logger log = LoggerFactory.getLogger(MimeUtil2.class);
102 
103 	/**
104 	 * Mime type used to identify a directory
105 	 */
106 	public static final MimeType DIRECTORY_MIME_TYPE = new MimeType("application/directory");
107 	/**
108 	 * Mime type used to identify an unknown MIME type
109 	 */
110 	public static final MimeType UNKNOWN_MIME_TYPE = new MimeType("application/octet-stream");
111 
112 	private static final Pattern mimeSplitter = Pattern.compile("[/;]++");
113 
114 	// All mime types known to the utility. This is synchronised for multi-threaded use
115 	// and ALL instances of MimeUtil2 share this list.
116 	private static Map mimeTypes = Collections.synchronizedMap(new HashMap());
117 
118 	// the native byte order of the underlying OS. "BIG" or "little" Endian
119 	private static ByteOrder nativeByteOrder = ByteOrder.nativeOrder();
120 
121 	private MimeDetectorRegistry mimeDetectorRegistry = new MimeDetectorRegistry();
122 
123 	/**
124 	 * While MimeType(s) are being loaded by the MimeDetector(s) they should be
125 	 * added to the list of known MIME types. It is not mandatory for MimeDetector(s)
126 	 * to do so but they should where possible so that the list is as complete as possible.
127 	 * You can add other MIME types to this list using this method. You can then use the
128 	 * isMimeTypeKnown(...) utility methods to see if a MIME type you have
129 	 * matches one that the utility has already seen.
130 	 * <p>
131 	 * This can be used to limit the mime types you work with i.e. if its not been loaded
132 	 * then don't bother using it as it won't match. This is no guarantee that a match will not
133 	 * be found as it is possible that a particular MimeDetector does not have an initialisation
134 	 * phase that loads all of the MIME types it will match.
135 	 * </p>
136 	 * <p>
137 	 * For instance if you had a MIME type of abc/xyz and passed this to
138 	 * isMimeTypeKnown(...) it would return false unless you specifically add
139 	 * this to the know MIME types using this method.
140 	 * </p>
141 	 *
142 	 * @param mimeType
143 	 *            a MIME type you want to add to the known MIME types.
144 	 *            Duplicates are ignored.
145 	 * @see #isMimeTypeKnown(String mimeType)
146 	 * @see #isMimeTypeKnown(MimeType mimetType)
147 	 */
148 	public static void addKnownMimeType(final MimeType mimeType) {
149 		addKnownMimeType(mimeType.toString());
150 	}
151 
152 
153 	/**
154 	 * While MimeType(s) are being loaded by the MimeDetector(s) they should be
155 	 * added to the list of known MIME types. It is not mandatory for MimeDetector(s)
156 	 * to do so but they should where possible so that the list is as complete as possible.
157 	 * You can add other MIME types to this list using this method. You can then use the
158 	 * isMimeTypeKnown(...) utility methods to see if a MIME type you have
159 	 * matches one that the utility has already seen.
160 	 * <p>
161 	 * This can be used to limit the mime types you work with i.e. if its not been loaded
162 	 * then don't bother using it as it won't match. This is no guarantee that a match will not
163 	 * be found as it is possible that a particular MimeDetector does not have an initialisation
164 	 * phase that loads all of the MIME types it will match.
165 	 * </p>
166 	 * <p>
167 	 * For instance if you had a MIME type of abc/xyz and passed this to
168 	 * isMimeTypeKnown(...) it would return false unless you specifically add
169 	 * this to the know MIME types using this method.
170 	 * </p>
171 	 *
172 	 * @param mimeType
173 	 *            a MIME type you want to add to the known MIME types.
174 	 *            Duplicates are ignored.
175 	 * @see #isMimeTypeKnown(String mimetype)
176 	 * @see #isMimeTypeKnown(MimeType mimetType)
177 	 */
178 	public static void addKnownMimeType(final String mimeType) {
179 		try {
180 
181 			String key = getMediaType(mimeType);
182 			Set s = (Set) mimeTypes.get(key);
183 			if (s == null) {
184 				s = new TreeSet();
185 			}
186 			s.add(getSubType(mimeType));
187 			mimeTypes.put(key, s);
188 		} catch (MimeException ignore) {
189 			// A couple of entries in the magic mime file don't follow the rules
190 			// so ignore them
191 		}
192 	}
193 	
194 	/**
195 	 * Returns a copy of the Collection of currently known MIME types as strings that have been 
196 	 * registered either by the initialisation methods of the MimeDetector(s) or by the user.
197 	 */
198 	public static Collection getKnownMimeTypes() {
199 		Collection mimeTypes = new ArrayList();
200 		Iterator i = MimeUtil2.mimeTypes.keySet().iterator();
201 		while(i.hasNext()) {
202 			// Iterate through each set and compose the MIME types
203 			String mediaType = (String)i.next();
204 			Iterator it = ((Set)MimeUtil2.mimeTypes.get(mediaType)).iterator();
205 			while(it.hasNext()) {
206 				mimeTypes.add(mediaType + "/" + (String)it.next());
207 			}
208 		}
209 		return mimeTypes;
210 	}
211 
212 	/**
213 	 * Register a MimeDetector and add it to the MimeDetector registry.
214 	 * MimeDetector(s) are effectively singletons as they are keyed against their
215 	 * fully qualified class name.
216 	 * @param mimeDetector. This must be the fully qualified name of a concrete instance of an
217 	 * AbstractMimeDetector class.
218 	 * This enforces that all custom MimeDetector(s) extend the AbstractMimeDetector.
219 	 * @see MimeDetector
220 	 */
221 	public MimeDetector registerMimeDetector(final String mimeDetector) {
222 		return mimeDetectorRegistry.registerMimeDetector(mimeDetector);
223 	}
224 
225 	/**
226 	 * Get the extension part of a file name defined by the file parameter.
227 	 *
228 	 * @param file
229 	 *            a file object
230 	 * @return the file extension or null if it does not have one.
231 	 */
232 	public static String getExtension(final File file) {
233 		return getExtension(file.getName());
234 	}
235 
236 	/**
237 	 * Get the extension part of a file name defined by the fileName parameter.
238 	 * There may be no extension or it could be a single part extension such as
239 	 * .bat or a multi-part extension such as .tar.gz
240 	 *
241 	 * @param fileName
242 	 *            a relative or absolute path to a file
243 	 * @return the file extension or null if it does not have one.
244 	 */
245 	public static String getExtension(final String fileName) {
246 		if(fileName == null || fileName.length() == 0) {
247 			return "";
248 		}
249 		int index = fileName.indexOf(".");
250 		return index < 0 ? "" : fileName.substring(index + 1);
251 	}
252 
253 	/**
254 	 * Get the first in a comma separated list of mime types. Useful when using
255 	 * extension mapping that can return multiple mime types separate by commas
256 	 * and you only want the first one.
257 	 *
258 	 * @param mimeTypes
259 	 *            comma separated list of mime types
260 	 * @return first in a comma separated list of mime types or null if the mimeTypes string is null or empty
261 	 */
262 	public static MimeType getFirstMimeType(final String mimeTypes) {
263 		if (mimeTypes != null && mimeTypes.trim().length() != 0) {
264 			return new MimeType(mimeTypes.split(",")[0].trim());
265 		}
266 		return null;
267 	}
268 
269 	/**
270 	 * Utility method to get the major or media part of a mime type i.e. the bit before
271 	 * the '/' character
272 	 *
273 	 * @param mimeType
274 	 *            you want to get the media part from
275 	 * @return media type of the mime type
276 	 * @throws MimeException
277 	 *             if you pass in an invalid mime type structure
278 	 */
279 	public static String getMediaType(final String mimeType)
280 			throws MimeException {
281 		return new MimeType(mimeType).getMediaType();
282 	}
283 
284 	/**
285 	 *
286 	 * Utility method to get the quality part of a mime type. If it does not
287 	 * exist then it is always set to q=1.0 unless it's a wild card. For the
288 	 * major component wild card the value is set to 0.01 For the minor
289 	 * component wild card the value is set to 0.02
290 	 * <p>
291 	 * Thanks to the Apache organisation for these settings.
292 	 *
293 	 * @param mimeType
294 	 *            a valid mime type string with or without a valid q parameter
295 	 * @return the quality value of the mime type either calculated from the
296 	 *         rules above or the actual value defined.
297 	 * @throws MimeException
298 	 *             this is thrown if the mime type pattern is invalid.
299 	 */
300 	public static double getMimeQuality(final String mimeType) throws MimeException {
301 		if (mimeType == null) {
302 			throw new MimeException("Invalid MimeType [" + mimeType + "].");
303 		}
304 		String[] parts = mimeSplitter.split(mimeType);
305 		if (parts.length < 2) {
306 			throw new MimeException("Invalid MimeType [" + mimeType + "].");
307 		}
308 		if (parts.length > 2) {
309 			for (int i = 2; i < parts.length; i++) {
310 				if (parts[i].trim().startsWith("q=")) {
311 					// Get the number part
312 					try {
313 						// Get the quality factor
314 						double d = Double.parseDouble(parts[i].split("=")[1]
315 								.trim());
316 						return d > 1.0 ? 1.0 : d;
317 					} catch (NumberFormatException e) {
318 						throw new MimeException(
319 								"Invalid MIME quality indicator ["
320 										+ parts[i].trim()
321 										+ "]. Must be a valid double between 0 and 1");
322 					} catch (Exception e) {
323 						throw new MimeException(
324 								"Error parsing MIME quality indicator.", e);
325 					}
326 				}
327 			}
328 		}
329 		// No quality indicator so always assume its 1 unless a wild card is used
330 		if (StringUtil.contains(parts[0], "*")) {
331 			return 0.01;
332 		} else if (StringUtil.contains(parts[1], "*")) {
333 			return 0.02;
334 		} else {
335 			// Assume q value of 1
336 			return 1.0;
337 		}
338 	}
339 
340 	/**
341 	 * Get a registered MimeDetector by name.
342 	 * @param name the name of a registered MimeDetector. This is always the fully qualified
343 	 * name of the class implementing the MimeDetector.
344 	 * @return
345 	 */
346 	public MimeDetector getMimeDetector(final String name) {
347 		return mimeDetectorRegistry.getMimeDetector(name);
348 	}
349 
350 	/**
351 	 * Get a Collection of possible MimeType(s) that this byte array could represent
352 	 * according to the registered MimeDetector(s). If no MimeType(s) are detected
353 	 * then the returned Collection will contain only the UNKNOWN_MIME_TYPE
354 	 * @param data
355 	 * @return all matching MimeType(s)
356 	 * @throws MimeException
357 	 */
358 	public final Collection getMimeTypes(final byte [] data) throws MimeException
359 	{
360 		return getMimeTypes(data, UNKNOWN_MIME_TYPE);
361 	}
362 
363 	/**
364 	 * Get a Collection of possible MimeType(s) that this byte array could represent
365 	 * according to the registered MimeDetector(s). If no MimeType(s) are detected
366 	 * then the returned Collection will contain only the passed in unknownMimeType
367 	 * @param data
368 	 * @param unknownMimeType used if the registered MimeDetector(s) fail to match any MimeType(s)
369 	 * @return all matching MimeType(s)
370 	 * @throws MimeException
371 	 */
372 	public final Collection getMimeTypes(final byte [] data, final MimeType unknownMimeType) throws MimeException
373 	{
374 		Collection mimeTypes = new MimeTypeHashSet();
375 		if(data == null) {
376 			log.error("byte array cannot be null.");
377 		} else {
378 			if(log.isDebugEnabled()) {
379 				try {
380 					log.debug("Getting MIME types for byte array [" + StringUtil.getHexString(data)+ "].");
381 				}catch(UnsupportedEncodingException e) {
382 					throw new MimeException(e);
383 				}
384 			}
385 			mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(data));
386 
387 			// We don't want the unknownMimeType added to the collection by MimeDetector(s)
388 			mimeTypes.remove(unknownMimeType);
389 		}
390 
391 		// If the collection is empty we want to add the unknownMimetype
392 		if(mimeTypes.isEmpty()) {
393 			mimeTypes.add(unknownMimeType);
394 		}
395 		if(log.isDebugEnabled()) {
396 			log.debug("Retrieved MIME types [" + mimeTypes.toString() + "]");
397 		}
398 		return mimeTypes;
399 	}
400 
401 	/**
402 	 * Get all of the matching mime types for this file object.
403 	 * The method delegates down to each of the registered MimeHandler(s) and returns a
404 	 * normalised list of all matching mime types. If no matching mime types are found the returned
405 	 * Collection will contain the default UNKNOWN_MIME_TYPE
406 	 * @param file the File object to detect.
407 	 * @return collection of matching MimeType(s)
408 	 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s)
409 	 * executed.
410 	 */
411 	public final Collection getMimeTypes(final File file) throws MimeException
412 	{
413 		return getMimeTypes(file, UNKNOWN_MIME_TYPE);
414 	}
415 
416 	/**
417 	 * Get all of the matching mime types for this file object.
418 	 * The method delegates down to each of the registered MimeHandler(s) and returns a
419 	 * normalised list of all matching mime types. If no matching mime types are found the returned
420 	 * Collection will contain the unknownMimeType passed in.
421 	 * @param file the File object to detect.
422 	 * @param unknownMimeType.
423 	 * @return the Collection of matching mime types. If the collection would be empty i.e. no matches then this will
424 	 * contain the passed in parameter unknownMimeType
425 	 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s)
426 	 * executed.
427 	 */
428 	public final Collection getMimeTypes(final File file, final MimeType unknownMimeType) throws MimeException
429 	{
430 		Collection mimeTypes = new MimeTypeHashSet();
431 
432 		if(file == null) {
433 			log.error("File reference cannot be null.");
434 		} else {
435 
436 			if(log.isDebugEnabled()) {
437 				log.debug("Getting MIME types for file [" + file.getAbsolutePath() + "].");
438 			}
439 
440 			if(file.isDirectory()) {
441 				mimeTypes.add(MimeUtil2.DIRECTORY_MIME_TYPE);
442 			} else {
443 				// Defer this call to the file name and stream methods
444 				mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(file));
445 
446 				// We don't want the unknownMimeType added to the collection by MimeDetector(s)
447 				mimeTypes.remove(unknownMimeType);
448 			}
449 		}
450 		// If the collection is empty we want to add the unknownMimetype
451 		if(mimeTypes.isEmpty()) {
452 			mimeTypes.add(unknownMimeType);
453 		}
454 		if(log.isDebugEnabled()) {
455 			log.debug("Retrieved MIME types [" + mimeTypes.toString() + "]");
456 		}
457 		return mimeTypes;
458 	}
459 
460 	/**
461 	 * Get all of the matching mime types for this InputStream object.
462 	 * The method delegates down to each of the registered MimeHandler(s) and returns a
463 	 * normalised list of all matching mime types. If no matching mime types are found the returned
464 	 * Collection will contain the default UNKNOWN_MIME_TYPE
465 	 * @param in InputStream to detect.
466 	 * @return
467 	 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s)
468 	 * executed.
469 	 */
470 	public final Collection getMimeTypes(final InputStream in) throws MimeException
471 	{
472 		return getMimeTypes(in, UNKNOWN_MIME_TYPE);
473 	}
474 
475 	/**
476 	 * Get all of the matching mime types for this InputStream object.
477 	 * The method delegates down to each of the registered MimeHandler(s) and returns a
478 	 * normalised list of all matching mime types. If no matching mime types are found the returned
479 	 * Collection will contain the unknownMimeType passed in.
480 	 * @param in the InputStream object to detect.
481 	 * @param unknownMimeType.
482 	 * @return the Collection of matching mime types. If the collection would be empty i.e. no matches then this will
483 	 * contain the passed in parameter unknownMimeType
484 	 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s)
485 	 * executed.
486 	 */
487 	public final Collection getMimeTypes(final InputStream in, final MimeType unknownMimeType) throws MimeException
488 	{
489 		Collection mimeTypes = new MimeTypeHashSet();
490 
491 		if(in == null) {
492 			log.error("InputStream reference cannot be null.");
493 		} else {
494 			if (!in.markSupported()) {
495 				throw new MimeException("InputStream must support the mark() and reset() methods.");
496 			}
497 			if(log.isDebugEnabled()) {
498 				log.debug("Getting MIME types for InputSteam [" + in + "].");
499 			}
500 			mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(in));
501 
502 			// We don't want the unknownMimeType added to the collection by MimeDetector(s)
503 			mimeTypes.remove(unknownMimeType);
504 		}
505 		// If the collection is empty we want to add the unknownMimetype
506 		if(mimeTypes.isEmpty()) {
507 			mimeTypes.add(unknownMimeType);
508 		}
509 		if(log.isDebugEnabled()) {
510 			log.debug("Retrieved MIME types [" + mimeTypes.toString() + "]");
511 		}
512 		return mimeTypes;
513 	}
514 
515 	/**
516 	 * Get all of the matching mime types for this file name.
517 	 * The method delegates down to each of the registered MimeHandler(s) and returns a
518 	 * normalised list of all matching mime types. If no matching mime types are found the returned
519 	 * Collection will contain the default UNKNOWN_MIME_TYPE
520 	 * @param fileName the name of a file to detect.
521 	 * @return collection of matching MimeType(s)
522 	 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s)
523 	 * executed.
524 	 */
525 	public final Collection getMimeTypes(final String fileName) throws MimeException
526 	{
527 		return getMimeTypes(fileName, UNKNOWN_MIME_TYPE);
528 	}
529 
530 	/**
531 	 * Get all of the matching mime types for this file name .
532 	 * The method delegates down to each of the registered MimeHandler(s) and returns a
533 	 * normalised list of all matching mime types. If no matching mime types are found the returned
534 	 * Collection will contain the unknownMimeType passed in.
535 	 * @param fileName the name of a file to detect.
536 	 * @param unknownMimeType.
537 	 * @return the Collection of matching mime types. If the collection would be empty i.e. no matches then this will
538 	 * contain the passed in parameter unknownMimeType
539 	 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s)
540 	 * executed.
541 	 */
542 	public final Collection getMimeTypes(final String fileName, final MimeType unknownMimeType) throws MimeException
543 	{
544 		Collection mimeTypes = new MimeTypeHashSet();
545 
546 		if(fileName == null) {
547 			log.error("fileName cannot be null.");
548 		} else {
549 			if(log.isDebugEnabled()) {
550 				log.debug("Getting MIME types for file name [" + fileName + "].");
551 			}
552 
553 			// Test if this is a directory
554 			File file = new File(fileName);
555 
556 			if(file.isDirectory()) {
557 				mimeTypes.add(MimeUtil2.DIRECTORY_MIME_TYPE);
558 			} else {
559 				mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(fileName));
560 
561 				// We don't want the unknownMimeType added to the collection by MimeDetector(s)
562 				mimeTypes.remove(unknownMimeType);
563 			}
564 		}
565 		// If the collection is empty we want to add the unknownMimetype
566 		if(mimeTypes.isEmpty()) {
567 			mimeTypes.add(unknownMimeType);
568 		}
569 		if(log.isDebugEnabled()) {
570 			log.debug("Retrieved MIME types [" + mimeTypes.toString() + "]");
571 		}
572 		return mimeTypes;
573 
574 	}
575 
576 	/**
577 	 * Get all of the matching mime types for this URL object.
578 	 * The method delegates down to each of the registered MimeHandler(s) and returns a
579 	 * normalised list of all matching mime types. If no matching mime types are found the returned
580 	 * Collection will contain the default UNKNOWN_MIME_TYPE
581 	 * @param url a URL to detect.
582 	 * @return Collection of matching MimeType(s)
583 	 * @throws MimeException if there are problems such as reading files generated when the MimeHandler(s)
584 	 * executed.
585 	 */
586 	public final Collection getMimeTypes(final URL url) throws MimeException
587 	{
588 		return getMimeTypes(url, UNKNOWN_MIME_TYPE);
589 	}
590 
591 	public final Collection getMimeTypes(final URL url, final MimeType unknownMimeType) throws MimeException
592 	{
593 		Collection mimeTypes = new MimeTypeHashSet();
594 
595 		if(url == null) {
596 			log.error("URL reference cannot be null.");
597 		} else {
598 			if(log.isDebugEnabled()) {
599 				log.debug("Getting MIME types for URL [" + url + "].");
600 			}
601 
602 			// Test if this is a directory
603 			File file = new File(url.getPath());
604 			if(file.isDirectory()) {
605 				mimeTypes.add(MimeUtil2.DIRECTORY_MIME_TYPE);
606 			} else {
607 				// defer these calls to the file name and stream methods
608 				mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(url));
609 
610 				// We don't want the unknownMimeType added to the collection by MimeDetector(s)
611 				mimeTypes.remove(unknownMimeType);
612 			}
613 		}
614 		// If the collection is empty we want to add the unknownMimetype
615 		if(mimeTypes.isEmpty()) {
616 			mimeTypes.add(unknownMimeType);
617 		}
618 		if(log.isDebugEnabled()) {
619 			log.debug("Retrieved MIME types [" + mimeTypes.toString() + "]");
620 		}
621 		return mimeTypes;
622 	}
623 
624 	/**
625 	 * Get the native byte order of the OS on which you are running. It will be
626 	 * either big or little endian. This is used internally for the magic mime
627 	 * rules mapping.
628 	 *
629 	 * @return ByteOrder
630 	 */
631 	public static ByteOrder getNativeOrder() {
632 		return MimeUtil2.nativeByteOrder;
633 	}
634 
635 	/**
636 	 * Gives you the best match for your requirements.
637 	 * <p>
638 	 * You can pass the accept header from a browser request to this method
639 	 * along with a comma separated list of possible mime types returned from
640 	 * say getExtensionMimeTypes(...) and the best match according to the accept
641 	 * header will be returned.
642 	 * </p>
643 	 * <p>
644 	 * The following is typical of what may be specified in an HTTP Accept
645 	 * header:
646 	 * </p>
647 	 * <p>
648 	 * Accept: text/xml, application/xml, application/xhtml+xml,
649 	 * text/html;q=0.9, text/plain;q=0.8, video/x-mng, image/png, image/jpeg,
650 	 * image/gif;q=0.2, text/css, *&#47;*;q=0.1
651 	 * </p>
652 	 * <p>
653 	 * The quality parameter (q) indicates how well the user agent handles the
654 	 * MIME type. A value of 1 indicates the MIME type is understood perfectly,
655 	 * and a value of 0 indicates the MIME type isn't understood at all.
656 	 * </p>
657 	 * <p>
658 	 * The reason the image/gif MIME type contains a quality parameter of 0.2,
659 	 * is to indicate that PNG & JPEG are preferred over GIF if the server is
660 	 * using content negotiation to deliver either a PNG or a GIF to user
661 	 * agents. Similarly, the text/html quality parameter has been lowered a
662 	 * little, to ensure that the XML MIME types are given in preference if
663 	 * content negotiation is being used to serve an XHTML document.
664 	 * </p>
665 	 *
666 	 * @param accept
667 	 *            is a comma separated list of mime types you can accept
668 	 *            including QoS parameters. Can pass the Accept: header
669 	 *            directly.
670 	 * @param canProvide
671 	 *            is a comma separated list of mime types that can be provided
672 	 *            such as that returned from a call to
673 	 *            getExtensionMimeTypes(...)
674 	 * @return the best matching mime type possible.
675 	 */
676 	public static MimeType getPreferedMimeType(String accept, final String canProvide) {
677 		if (canProvide == null || canProvide.trim().length() == 0) {
678 			throw new MimeException(
679 					"Must specify at least one MIME type that can be provided.");
680 		}
681 		if (accept == null || accept.trim().length() == 0) {
682 			accept = "*/*";
683 		}
684 
685 		// If an accept header is passed in then lets remove the Accept part
686 		if (accept.indexOf(":") > 0) {
687 			accept = accept.substring(accept.indexOf(":") + 1);
688 		}
689 
690 		// Remove any unwanted spaces from the wanted mime types for instance
691 		// text/html; q=0.4
692 		accept = accept.replaceAll(" ", "");
693 
694 		return getBestMatch(accept, getList(canProvide));
695 	}
696 
697 	/**
698 	 * Get the most specific match of the Collection of mime types passed in.
699 	 * The Collection
700 	 * @param mimeTypes this should be the Collection of mime types returned
701 	 * from a getMimeTypes(...) call.
702 	 * @return the most specific MimeType. If more than one of the mime types in the Collection
703 	 * have the same value then the first one found with this value in the Collection is returned.
704 	 */
705 	public static MimeType getMostSpecificMimeType(final Collection mimeTypes) {
706 		MimeType mimeType = null;
707 		int specificity = 0;
708 		for(Iterator it = mimeTypes.iterator(); it.hasNext();) {
709 			MimeType mt = (MimeType)it.next();
710 			if(mt.getSpecificity() > specificity) {
711 				mimeType = mt;
712 				specificity = mimeType.getSpecificity();
713 			}
714 		}
715 		return mimeType;
716 	}
717 
718 	/**
719 	 * Utility method to get the minor part of a mime type i.e. the bit after
720 	 * the '/' character
721 	 *
722 	 * @param mimeType
723 	 *            you want to get the minor part from
724 	 * @return sub type of the mime type
725 	 * @throws MimeException
726 	 *             if you pass in an invalid mime type structure
727 	 */
728 	public static String getSubType(final String mimeType)
729 			throws MimeException {
730 		return new MimeType(mimeType).getSubType();
731 	}
732 
733 	/**
734 	 * Check to see if this mime type is one of the types seen during
735 	 * initialisation or has been added at some later stage using
736 	 * addKnownMimeType(...)
737 	 *
738 	 * @param mimeType
739 	 * @return true if the mimeType is in the list else false is returned
740 	 * @see #addKnownMimeType(String mimetype)
741 	 */
742 	public static boolean isMimeTypeKnown(final MimeType mimeType) {
743 		try {
744 			Set s = (Set) mimeTypes.get(mimeType.getMediaType());
745 			if (s == null) {
746 				return false;
747 			}
748 			return s.contains(mimeType.getSubType());
749 		} catch (MimeException e) {
750 			return false;
751 		}
752 	}
753 
754 	/**
755 	 * Check to see if this mime type is one of the types seen during
756 	 * initialisation or has been added at some later stage using
757 	 * addKnownMimeType(...)
758 	 *
759 	 * @param mimeType
760 	 * @return true if the mimeType is in the list else false is returned
761 	 * @see #addKnownMimeType(String mimetype)
762 	 */
763 	public static boolean isMimeTypeKnown(final String mimeType) {
764 		return isMimeTypeKnown(new MimeType(mimeType));
765 	}
766 
767 	/**
768 	 * Utility convenience method to check if a particular MimeType instance is actually a TextMimeType.
769 	 * Used when iterating over a collection of MimeType's to help with casting to enable access
770 	 * the the TextMimeType methods not available to a standard MimeType. Can also use instanceof.
771 	 * @param mimeType
772 	 * @return true if the passed in instance is a TextMimeType
773 	 * @see MimeType
774 	 * @see TextMimeType
775 	 */
776 	public static boolean isTextMimeType(final MimeType mimeType) {
777 		return mimeType instanceof TextMimeType;
778 	}
779 
780 	/**
781 	 * Remove a previously registered MimeDetector
782 	 * @param mimeDetector
783 	 * @return the MimeDetector that was removed from the registry else null.
784 	 */
785 	public MimeDetector unregisterMimeDetector(final MimeDetector mimeDetector) {
786 		return mimeDetectorRegistry.unregisterMimeDetector(mimeDetector);
787 	}
788 
789 	/**
790 	 * Remove a previously registered MimeDetector
791 	 * @param mimeDetector
792 	 * @return the MimeDetector that was removed from the registry else null.
793 	 */
794 	public MimeDetector unregisterMimeDetector(final String mimeDetector) {
795 		return mimeDetectorRegistry.unregisterMimeDetector(mimeDetector);
796 	}
797 
798 	/**
799 	 * Get the quality parameter of this mime type i.e. the <code>q=</code> property.
800 	 * This method implements a value system similar to that used by the apache server i.e.
801 	 * if the media type is a * then it's <code>q</code> value is set to 0.01 and if the sub type is
802 	 * a * then the <code>q</code> value is set to 0.02 unless a specific <code>q</code>
803 	 * value is specified. If a <code>q</code> property is set it is limited to a max value of 1.0
804 	 *
805 	 * @param mimeType
806 	 * @return the quality value as a double between 0.0 and 1.0
807 	 * @throws MimeException
808 	 */
809 	public static double getQuality(final String mimeType) throws MimeException
810 	{
811 		return getMimeQuality(mimeType);
812 	}
813 
814 	// Check each entry in each of the wanted lists against the entries in the
815 	// can provide list.
816 	// We take into consideration the QoS indicator
817 	private static MimeType getBestMatch(final String accept, final List canProvideList) {
818 
819 		if (canProvideList.size() == 1) {
820 			// If we only have one mime type that can be provided then thats
821 			// what we provide even if
822 			// the wanted list does not contain this entry or it's the worst
823 			// QoS.
824 			// This will cover the majority of cases
825 			return new MimeType((String) canProvideList.get(0));
826 		}
827 
828 		Map wantedMap = normaliseWantedMap(accept, canProvideList);
829 
830 		MimeType bestMatch = null;
831 		double qos = 0.0;
832 		Iterator it = wantedMap.keySet().iterator();
833 		while (it.hasNext()) {
834 			List wantedList = (List) wantedMap.get(it.next());
835 			Iterator it2 = wantedList.iterator();
836 			while (it2.hasNext()) {
837 				String mimeType = (String) it2.next();
838 				double q = getMimeQuality(mimeType);
839 				String majorComponent = getMediaType(mimeType);
840 				String minorComponent = getSubType(mimeType);
841 				if (q > qos) {
842 					qos = q;
843 					bestMatch = new MimeType(majorComponent + "/" + minorComponent);
844 				}
845 			}
846 		}
847 		// Gone through all the wanted list and found the best match possible
848 		return bestMatch;
849 	}
850 
851 	// Turn a comma separated string into a list
852 	private static List getList(final String options) {
853 		List list = new ArrayList();
854 		String[] array = options.split(",");
855 		for (int i = 0; i < array.length; i++) {
856 			list.add(array[i].trim());
857 		}
858 		return list;
859 	}
860 
861 	// Turn a comma separated string of accepted mime types into a Map
862 	// based on the list of mime types that can be provided
863 	private static Map normaliseWantedMap(final String accept, final List canProvide) {
864 		Map map = new LinkedHashMap();
865 		String[] array = accept.split(",");
866 
867 		for (int i = 0; i < array.length; i++) {
868 			String mimeType = array[i].trim();
869 			String major = getMediaType(mimeType);
870 			String minor = getSubType(mimeType);
871 			double qos = getMimeQuality(mimeType);
872 
873 			if (StringUtil.contains(major ,"*")) {
874 				// All canProvide types are acceptable with the qos defined OR
875 				// 0.01 if not defined
876 				Iterator it = canProvide.iterator();
877 				while (it.hasNext()) {
878 					String mt = (String) it.next();
879 					List list = (List) map.get(getMediaType(mt));
880 					if (list == null) {
881 						list = new ArrayList();
882 					}
883 					list.add(mt + ";q=" + qos);
884 					map.put(getMediaType(mt), list);
885 				}
886 			} else if (StringUtil.contains(minor, "*")) {
887 				Iterator it = canProvide.iterator();
888 				while (it.hasNext()) {
889 					String mt = (String) it.next();
890 					if (getMediaType(mt).equals(major)) {
891 						List list = (List) map.get(major);
892 						if (list == null) {
893 							list = new ArrayList();
894 						}
895 						list.add(major + "/" + getSubType(mt) + ";q="
896 								+ qos);
897 						map.put(major, list);
898 					}
899 				}
900 
901 			} else {
902 				if (canProvide.contains(major + "/" + minor)) {
903 					List list = (List) map.get(major);
904 					if (list == null) {
905 						list = new ArrayList();
906 					}
907 					list.add(major + "/" + minor + ";q=" + qos);
908 					map.put(major, list);
909 				}
910 			}
911 		}
912 		return map;
913 	}
914 
915 	/**
916 	 * Utility method to get the InputStream from a URL. Handles several schemes, for instance, if the URL points to a jar
917 	 * entry it will get a proper usable stream from the URL
918 	 * @param url
919 	 * @return
920 	 */
921 	public static InputStream getInputStreamForURL(URL url) throws Exception {
922 		try {
923 			return url.openStream();
924 		}catch(ZipException e) {
925 			return ZipJarUtil.getInputStreamForURL(url);
926 		}
927 	}
928 }
929 
930 /**
931  * <p>
932  * All methods in this class that return a Collection object actually return a {@link MimeTypeHashSet} that implements both the {@link Set} and {@link Collection}
933  * interfaces.
934  * </p>
935 
936  * @author Steven McArdle
937  *
938  */
939 class MimeDetectorRegistry {
940 
941 	private static Logger log = LoggerFactory.getLogger(MimeDetectorRegistry.class);
942 
943 	/**
944 	 * This property holds an instance of the TextMimeDetector.
945 	 * This is the only pre-registerd MimeDetector and cannot be
946 	 * de-registered or registered by your code
947 	 */
948 	private TextMimeDetector TextMimeDetector = new TextMimeDetector(1);
949 
950 
951 	private Map mimeDetectors = new TreeMap();
952 
953 	/**
954 	 * Use the fully qualified name of a MimeDetector and try to instantiate it if
955 	 * it's not already registered. If it's already registered then log a warning and
956 	 * return the already registered MimeDetector
957 	 * @param mimeDetector
958 	 * @return MimeDetector registered under this name. Returns null if an exception occurs
959 	 */
960 	MimeDetector registerMimeDetector(final String mimeDetector) {
961 		if(mimeDetectors.containsKey(mimeDetector)) {
962 			log.warn("MimeDetector [" + mimeDetector + "] will not be registered as a MimeDetector with this name is already registered.");
963 			return (MimeDetector)mimeDetectors.get(mimeDetector);
964 		}
965 		// Create the mime detector if we can
966 		try {
967 			MimeDetector md = (MimeDetector)Class.forName(mimeDetector).newInstance();
968 			md.init();
969 			if(log.isDebugEnabled()) {
970 				log.debug("Registering MimeDetector with name [" + md.getName() + "] and description [" + md.getDescription() + "]");
971 			}
972 			mimeDetectors.put(mimeDetector, md);
973 			return md;
974 		}catch(Exception e) {
975 			log.error("Exception while registering MimeDetector [" + mimeDetector + "].", e);
976 		}
977 		// Failed to create an instance
978 		return null;
979 	}
980 
981 	MimeDetector getMimeDetector(final String name) {
982 		return (MimeDetector)mimeDetectors.get(name);
983 	}
984 
985 	Collection getMimeTypes(final byte [] data) throws MimeException
986 	{
987 		Collection mimeTypes = new ArrayList();
988 		try {
989 			if(!EncodingGuesser.getSupportedEncodings().isEmpty()) {
990 				mimeTypes = TextMimeDetector.getMimeTypes(data);
991 			}
992 		}catch(UnsupportedOperationException ignore) {
993 			// The TextMimeDetector will throw this if it decides
994 			// the content is not text
995 		}
996 		for(Iterator it  = mimeDetectors.values().iterator();it.hasNext();) {
997 			try {
998 				MimeDetector md = (MimeDetector)it.next();
999 				mimeTypes.addAll(md.getMimeTypes(data));
1000 			}catch(UnsupportedOperationException ignore) {
1001 				// We ignore this as it indicates that this MimeDetector does not support
1002 				// Getting mime types from files
1003 			}catch(Exception e) {
1004 				log.error(e.getLocalizedMessage(), e);
1005 			}
1006 		}
1007 		return mimeTypes;
1008 	}
1009 
1010 
1011 
1012 	Collection getMimeTypes(final String fileName) throws MimeException
1013 	{
1014 		Collection mimeTypes = new ArrayList();
1015 		try {
1016 			if(!EncodingGuesser.getSupportedEncodings().isEmpty()) {
1017 				mimeTypes = TextMimeDetector.getMimeTypes(fileName);
1018 			}
1019 		}catch(UnsupportedOperationException ignore) {
1020 			// The TextMimeDetector will throw this if it decides
1021 			// the content is not text
1022 		}
1023 		for(Iterator it  = mimeDetectors.values().iterator();it.hasNext();) {
1024 			try {
1025 				MimeDetector md = (MimeDetector)it.next();
1026 				mimeTypes.addAll(md.getMimeTypes(fileName));
1027 			}catch(UnsupportedOperationException usoe) {
1028 				// We ignore this as it indicates that this MimeDetector does not support
1029 				// Getting mime types from streams
1030 			}catch(Exception e) {
1031 				log.error(e.getLocalizedMessage(), e);
1032 			}
1033 		}
1034 		return mimeTypes;
1035 	}
1036 
1037 	Collection getMimeTypes(final File file) throws MimeException
1038 	{
1039 		Collection mimeTypes = new ArrayList();
1040 		try {
1041 			if(!EncodingGuesser.getSupportedEncodings().isEmpty()) {
1042 				mimeTypes = TextMimeDetector.getMimeTypes(file);
1043 			}
1044 		}catch(UnsupportedOperationException ignore) {
1045 			// The TextMimeDetector will throw this if it decides
1046 			// the content is not text
1047 		}
1048 		for(Iterator it  = mimeDetectors.values().iterator();it.hasNext();) {
1049 			try {
1050 				MimeDetector md = (MimeDetector)it.next();
1051 				mimeTypes.addAll(md.getMimeTypes(file));
1052 			}catch(UnsupportedOperationException usoe) {
1053 				// We ignore this as it indicates that this MimeDetector does not support
1054 				// Getting mime types from streams
1055 			}catch(Exception e) {
1056 				log.error(e.getLocalizedMessage(), e);
1057 			}
1058 		}
1059 		return mimeTypes;
1060 	}
1061 
1062 	Collection getMimeTypes(final InputStream in) throws MimeException
1063 	{
1064 		Collection mimeTypes = new ArrayList();
1065 		try {
1066 			if(!EncodingGuesser.getSupportedEncodings().isEmpty()) {
1067 				mimeTypes = TextMimeDetector.getMimeTypes(in);
1068 			}
1069 		}catch(UnsupportedOperationException ignore) {
1070 			// The TextMimeDetector will throw this if it decides
1071 			// the content is not text
1072 		}
1073 		for(Iterator it  = mimeDetectors.values().iterator();it.hasNext();) {
1074 			try {
1075 				MimeDetector md = (MimeDetector)it.next();
1076 				mimeTypes.addAll(md.getMimeTypes(in));
1077 			}catch(UnsupportedOperationException usoe) {
1078 				// We ignore this as it indicates that this MimeDetector does not support
1079 				// Getting mime types from streams
1080 			}catch(Exception e) {
1081 				log.error(e.getLocalizedMessage(), e);
1082 			}
1083 		}
1084 		return mimeTypes;
1085 	}
1086 
1087 	Collection getMimeTypes(final URL url) throws MimeException
1088 	{
1089 		Collection mimeTypes = new ArrayList();
1090 		try {
1091 			if(!EncodingGuesser.getSupportedEncodings().isEmpty()) {
1092 				mimeTypes = TextMimeDetector.getMimeTypes(url);
1093 			}
1094 		}catch(UnsupportedOperationException ignore) {
1095 			// The TextMimeDetector will throw this if it decides
1096 			// the content is not text
1097 		}
1098 		for(Iterator it  = mimeDetectors.values().iterator();it.hasNext();) {
1099 			try {
1100 				MimeDetector md = (MimeDetector)it.next();
1101 				mimeTypes.addAll(md.getMimeTypes(url));
1102 			}catch(UnsupportedOperationException usoe) {
1103 				// We ignore this as it indicates that this MimeDetector does not support
1104 				// Getting mime types from streams
1105 			}catch(Exception e) {
1106 				log.error(e.getLocalizedMessage(), e);
1107 			}
1108 		}
1109 		return mimeTypes;
1110 	}
1111 
1112 	MimeDetector unregisterMimeDetector(final String mimeDetector) {
1113 		if(mimeDetector == null) {
1114 			return null;
1115 		}
1116 		if(log.isDebugEnabled()) {
1117 			log.debug("Unregistering MimeDetector [" + mimeDetector + "] from registry.");
1118 		}
1119 		try {
1120 			MimeDetector md = (MimeDetector)mimeDetectors.get(mimeDetector);
1121 			if(md != null) {
1122 				md.delete();
1123 				return (MimeDetector)mimeDetectors.remove(mimeDetector);
1124 			}
1125 		}catch(Exception e) {
1126 			log.error("Exception while un-registering MimeDetector [" + mimeDetector + "].", e);
1127 		}
1128 
1129 		// Shouldn't get here
1130 		return null;
1131 	}
1132 
1133 	/**
1134 	 * unregister the MimeDetector from the list.
1135 	 * @param mimeDetector the MimeDetector to unregister
1136 	 * @return MimeDetector unregistered or null if it was not registered
1137 	 */
1138 	MimeDetector unregisterMimeDetector(final MimeDetector mimeDetector) {
1139 		if(mimeDetector == null) {
1140 			return null;
1141 		}
1142 		return unregisterMimeDetector(mimeDetector.getName());
1143 	}
1144 }