1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package eu.medsea.mimeutil.detector;
18
19 import java.io.IOException;
20 import java.io.InputStream;
21 import java.io.RandomAccessFile;
22 import java.nio.ByteBuffer;
23 import java.nio.ByteOrder;
24 import java.util.ArrayList;
25 import java.util.Collections;
26 import java.util.Iterator;
27 import java.util.List;
28 import java.util.Vector;
29
30 import eu.medsea.mimeutil.MimeType;
31 import eu.medsea.util.StringUtil;
32
33
34
35
36
37
38
39
40 class MagicMimeEntry {
41
42 public static final int STRING_TYPE = 1;
43 public static final int BELONG_TYPE = 2;
44 public static final int SHORT_TYPE = 3;
45 public static final int LELONG_TYPE = 4;
46 public static final int BESHORT_TYPE = 5;
47 public static final int LESHORT_TYPE = 6;
48 public static final int BYTE_TYPE = 7;
49 public static final int UNKNOWN_TYPE = 20;
50
51 private ArrayList subEntries = new ArrayList();
52 private int checkBytesFrom;
53 private int type;
54 private String typeStr;
55 private String content;
56 private long contentNumber;
57 private MimeType mimeType;
58 private String mimeEnc;
59 private MagicMimeEntry parent;
60
61 private MagicMimeEntryOperation operation = MagicMimeEntryOperation.EQUALS;
62
63 boolean isBetween;
64
65 public MagicMimeEntry(ArrayList entries)
66 throws InvalidMagicMimeEntryException {
67 this(0, null, entries);
68 }
69
70 private MagicMimeEntry(int level, MagicMimeEntry parent, ArrayList entries)
71 throws InvalidMagicMimeEntryException {
72 if (entries == null || entries.size() == 0) {
73 return;
74 }
75
76 this.parent = parent;
77 if (parent != null) {
78 parent.subEntries.add(this);
79 }
80
81 try {
82 addEntry((String) entries.get(0));
83 } catch (Exception e) {
84 throw new InvalidMagicMimeEntryException(entries, e);
85 }
86 entries.remove(0);
87
88 while (entries.size() > 0) {
89 int thisLevel = howManyGreaterThans((String) entries.get(0));
90 if (thisLevel > level) {
91 new MagicMimeEntry(thisLevel, this, entries);
92 } else {
93 break;
94 }
95 }
96 }
97
98 public String toString() {
99 return "MimeMagicType: " + checkBytesFrom + ", " + type + ", "
100 + content + ", " + mimeType + ", " + mimeEnc;
101 }
102
103 public void traverseAndPrint(String tabs) {
104 System.out.println(tabs + toString());
105 int len = subEntries.size();
106 for (int i = 0; i < len; i++) {
107 MagicMimeEntry me = (MagicMimeEntry) subEntries.get(i);
108 me.traverseAndPrint(tabs + "\t");
109 }
110 }
111
112 private int howManyGreaterThans(String aLine) {
113 if (aLine == null) {
114 return -1;
115 }
116 int i = 0;
117 int len = aLine.length();
118 while (i < len) {
119 if (aLine.charAt(i) == '>') {
120 i++;
121 } else {
122 break;
123 }
124 }
125 return i;
126 }
127
128
129
130
131
132
133
134 void addEntry(String aLine) throws InvalidMagicMimeEntryException {
135
136
137
138
139
140
141
142 String trimmed = aLine.replaceAll("[\\\\][ ]", "<##>").replaceAll(
143 "^>*", "").replaceAll("\\s+", "\t")
144 .replaceAll("[\t]{2,}", "\t").replaceAll("<##>", "\\\\ ");
145 String[] tokens = trimmed.split("\t");
146
147
148 Vector v = new Vector();
149 for (int i = 0; i < tokens.length; i++) {
150 if (!"".equals(tokens[i])) {
151 v.add(tokens[i]);
152 }
153 }
154 tokens = new String[v.size()];
155 tokens = (String[]) v.toArray(tokens);
156
157 if (tokens.length > 0) {
158 String tok = tokens[0].trim();
159 try {
160 if (tok.startsWith("0x")) {
161 checkBytesFrom = Integer.parseInt(tok.substring(2), 16);
162 } else {
163 checkBytesFrom = Integer.parseInt(tok);
164 }
165 } catch (NumberFormatException e) {
166 throw new InvalidMagicMimeEntryException(Collections
167 .singletonList(this), e);
168 }
169 }
170 if (tokens.length > 1) {
171 typeStr = tokens[1].trim();
172 type = getType(typeStr);
173 }
174 if (tokens.length > 2) {
175
176 content = ltrim(tokens[2]);
177
178
179
180 switch (type) {
181 case BYTE_TYPE:
182 case SHORT_TYPE:
183 case BESHORT_TYPE:
184 case LESHORT_TYPE:
185 case LELONG_TYPE:
186 case BELONG_TYPE:
187 operation = MagicMimeEntryOperation
188 .getOperationForNumberField(content);
189 break;
190 default:
191 operation = MagicMimeEntryOperation
192 .getOperationForStringField(content);
193 }
194
195 if (content.length() > 0
196 && content.charAt(0) == operation.getOperationID())
197 content = content.substring(1);
198
199 content = stringWithEscapeSubstitutions(content);
200 } else
201 content = "";
202
203
204 if (tokens.length > 3) {
205 mimeType = new MimeType(tokens[3].trim());
206 }
207 if (tokens.length > 4) {
208 mimeEnc = tokens[4].trim();
209 }
210
211 initContentNumber();
212 }
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231 private void initContentNumber() {
232 contentNumber = 0;
233 if (content.length() == 0)
234 return;
235
236
237
238
239
240 switch (type) {
241 case BYTE_TYPE:
242 case SHORT_TYPE:
243 case BESHORT_TYPE:
244 case LESHORT_TYPE:
245 case LELONG_TYPE:
246 case BELONG_TYPE: {
247 if (content.startsWith("0x")) {
248 contentNumber = Long.parseLong(content.substring(2).trim(), 16);
249
250
251
252
253
254
255
256
257
258
259
260
261
262 } else if (content.startsWith("0")) {
263 contentNumber = Long.parseLong(content.trim(), 8);
264 } else {
265 contentNumber = Long.parseLong(content.trim());
266 }
267 }
268 }
269 }
270
271 private String ltrim(String s) {
272 for (int i = 0; i < s.length(); i++) {
273 if (s.charAt(i) != ' ') {
274 return s.substring(i);
275 }
276 }
277 return s;
278 }
279
280 private int getType(String tok) {
281 if (tok.startsWith("string")) {
282 return STRING_TYPE;
283 } else if (tok.startsWith("belong")) {
284 return BELONG_TYPE;
285 } else if (tok.equals("short")) {
286 return SHORT_TYPE;
287 } else if (tok.startsWith("lelong")) {
288 return LELONG_TYPE;
289 } else if (tok.startsWith("beshort")) {
290 return BESHORT_TYPE;
291 } else if (tok.startsWith("leshort")) {
292 return LESHORT_TYPE;
293 } else if (tok.equals("byte")) {
294 return BYTE_TYPE;
295 }
296
297 return UNKNOWN_TYPE;
298 }
299
300 public int getCheckBytesFrom() {
301 return checkBytesFrom;
302 }
303
304 public int getType() {
305 return type;
306 }
307
308 public String getContent() {
309 return content;
310 }
311
312 public MimeType getMimeType() {
313 return mimeType;
314 }
315
316 MagicMimeEntry getMatch(InputStream in) throws IOException {
317 int bytesToRead = getInputStreamMarkLength();
318 in.mark(bytesToRead);
319 try {
320 byte[] content = new byte[bytesToRead];
321
322
323
324
325
326 int offset = 0;
327 int restBytesToRead = bytesToRead;
328 while (restBytesToRead > 0) {
329 int bytesRead = in.read(content, offset, restBytesToRead);
330 if (bytesRead < 0)
331 break;
332
333 offset += bytesRead;
334 restBytesToRead -= bytesRead;
335 }
336
337 return getMatch(content);
338 } finally {
339 in.reset();
340 }
341 }
342
343 MagicMimeEntry getMatch(byte[] content) throws IOException {
344 ByteBuffer buf = readBuffer(content);
345 if (buf == null)
346 return null;
347
348 buf.position(0);
349 boolean matches = match(buf);
350 if (matches) {
351 int subLen = subEntries.size();
352 MimeType mimeType = getMimeType();
353 if (subLen > 0) {
354 for (int k = 0; k < subLen; k++) {
355 MagicMimeEntry me = (MagicMimeEntry) subEntries.get(k);
356 MagicMimeEntry matchingEntry = me.getMatch(content);
357 if (matchingEntry != null) {
358 return matchingEntry;
359 }
360 }
361 if (mimeType != null) {
362 return this;
363 }
364 } else {
365 if (mimeType != null)
366 return this;
367 }
368 }
369
370 return null;
371 }
372
373 MagicMimeEntry getMatch(RandomAccessFile raf) throws IOException {
374 ByteBuffer buf = readBuffer(raf);
375 if (buf == null) {
376 return null;
377 }
378 boolean matches = match(buf);
379 if (matches) {
380 MimeType mimeType = getMimeType();
381 if (subEntries.size() > 0) {
382 for (int i = 0; i < subEntries.size(); i++) {
383 MagicMimeEntry me = (MagicMimeEntry) subEntries.get(i);
384 MagicMimeEntry matchingEntry = me.getMatch(raf);
385 if (matchingEntry != null) {
386 return matchingEntry;
387 }
388 }
389 if (mimeType != null) {
390 return this;
391 }
392 } else {
393 if (mimeType != null)
394 return this;
395 }
396 }
397
398 return null;
399 }
400
401
402
403
404 private ByteBuffer readBuffer(byte[] content) throws IOException {
405 int startPos = getCheckBytesFrom();
406 if (content == null || startPos > content.length) {
407 return null;
408 }
409
410 ByteBuffer buf = null;
411 try {
412 switch (getType()) {
413 case MagicMimeEntry.STRING_TYPE: {
414 int len = 0;
415
416
417
418
419
420
421 int index = typeStr.indexOf(">");
422 if (index != -1) {
423 len = Integer.parseInt(typeStr.substring(index + 1, typeStr
424 .length() - 1));
425 isBetween = true;
426 } else {
427 len = getContent().length();
428 }
429 buf = ByteBuffer.allocate(len);
430 buf.put(content, startPos, len);
431 break;
432 }
433
434 case MagicMimeEntry.SHORT_TYPE:
435 case MagicMimeEntry.LESHORT_TYPE:
436 case MagicMimeEntry.BESHORT_TYPE: {
437 buf = ByteBuffer.allocate(2);
438 buf.put(content, startPos, 2);
439 break;
440 }
441
442 case MagicMimeEntry.LELONG_TYPE:
443 case MagicMimeEntry.BELONG_TYPE: {
444 buf = ByteBuffer.allocate(4);
445 buf.put(content, startPos, 4);
446 break;
447 }
448
449 case MagicMimeEntry.BYTE_TYPE: {
450 buf = ByteBuffer.allocate(1);
451 buf.put(content, startPos, 1);
452 break;
453 }
454
455 default: {
456 buf = null;
457 break;
458 }
459 }
460 } catch (IndexOutOfBoundsException iobe) {
461
462 return null;
463 }
464 return buf;
465 }
466
467 private ByteBuffer readBuffer(RandomAccessFile raf) throws IOException {
468 int startPos = getCheckBytesFrom();
469 if (startPos > raf.length())
470 return null;
471 raf.seek(startPos);
472 ByteBuffer buf;
473 switch (getType()) {
474 case MagicMimeEntry.STRING_TYPE: {
475 int len = 0;
476
477
478
479
480
481 int index = typeStr.indexOf(">");
482 if (index != -1) {
483 len = Integer.parseInt(typeStr.substring(index + 1, typeStr
484 .length() - 1));
485 isBetween = true;
486 } else {
487 len = getContent().length();
488 }
489 buf = ByteBuffer.allocate(len);
490 raf.read(buf.array(), 0, len);
491 break;
492 }
493
494 case MagicMimeEntry.SHORT_TYPE:
495 case MagicMimeEntry.LESHORT_TYPE:
496 case MagicMimeEntry.BESHORT_TYPE: {
497 buf = ByteBuffer.allocate(2);
498 raf.read(buf.array(), 0, 2);
499 break;
500 }
501
502 case MagicMimeEntry.LELONG_TYPE:
503 case MagicMimeEntry.BELONG_TYPE: {
504 buf = ByteBuffer.allocate(4);
505 raf.read(buf.array(), 0, 4);
506 break;
507 }
508
509 case MagicMimeEntry.BYTE_TYPE: {
510 buf = ByteBuffer.allocate(1);
511 raf.read(buf.array(), 0, 1);
512 break;
513 }
514
515 default: {
516 buf = null;
517 break;
518 }
519 }
520
521 return buf;
522 }
523
524 private int getInputStreamMarkLength() {
525 int len = _getInputStreamMarkLength();
526 for (Iterator it = subEntries.iterator(); it.hasNext();) {
527 MagicMimeEntry subEntry = (MagicMimeEntry) it.next();
528 int subLen = subEntry.getInputStreamMarkLength();
529 if (len < subLen)
530 len = subLen;
531 }
532 return len;
533 }
534
535 private int _getInputStreamMarkLength() {
536 switch (getType()) {
537 case MagicMimeEntry.STRING_TYPE: {
538 int len = 0;
539
540 int index = typeStr.indexOf(">");
541 if (index != -1) {
542 len = Integer.parseInt(typeStr.substring(index + 1, typeStr
543 .length() - 1));
544 isBetween = true;
545 } else {
546 if (getContent() != null)
547
548
549 len = getContent().length();
550 }
551 return getCheckBytesFrom() + len + 1;
552 }
553
554 case MagicMimeEntry.SHORT_TYPE:
555 case MagicMimeEntry.LESHORT_TYPE:
556 case MagicMimeEntry.BESHORT_TYPE: {
557 return getCheckBytesFrom() + 2;
558 }
559
560 case MagicMimeEntry.LELONG_TYPE:
561 case MagicMimeEntry.BELONG_TYPE: {
562 return getCheckBytesFrom() + 4;
563 }
564
565 case MagicMimeEntry.BYTE_TYPE: {
566 return getCheckBytesFrom() + 1;
567 }
568
569 default: {
570 return 0;
571 }
572 }
573 }
574
575
576
577
578 private boolean match(ByteBuffer buf) throws IOException {
579 boolean matches = true;
580 ByteOrder byteOrder = ByteOrder.BIG_ENDIAN;
581
582 switch (getType()) {
583 case MagicMimeEntry.STRING_TYPE: {
584 matches = matchString(buf);
585 break;
586 }
587
588 case MagicMimeEntry.SHORT_TYPE: {
589 matches = matchShort(buf, byteOrder);
590 break;
591 }
592
593 case MagicMimeEntry.LESHORT_TYPE:
594 case MagicMimeEntry.BESHORT_TYPE: {
595 if (getType() == MagicMimeEntry.LESHORT_TYPE) {
596 byteOrder = ByteOrder.LITTLE_ENDIAN;
597 }
598 matches = matchShort(buf, byteOrder);
599 break;
600 }
601
602 case MagicMimeEntry.LELONG_TYPE:
603 case MagicMimeEntry.BELONG_TYPE: {
604 if (getType() == MagicMimeEntry.LELONG_TYPE) {
605 byteOrder = ByteOrder.LITTLE_ENDIAN;
606 }
607 matches = matchLong(buf, byteOrder);
608 break;
609 }
610
611 case MagicMimeEntry.BYTE_TYPE: {
612 matches = matchByte(buf);
613 break;
614 }
615
616 default: {
617 matches = false;
618 break;
619 }
620 }
621 return matches;
622 }
623
624 private boolean matchString(ByteBuffer bbuf) throws IOException {
625 if (isBetween) {
626 String buffer = new String(bbuf.array());
627 if (StringUtil.contains(buffer, getContent())) {
628 return true;
629 }
630 return false;
631 }
632
633 if (operation.equals(MagicMimeEntryOperation.EQUALS)) {
634 int read = getContent().length();
635 for (int j = 0; j < read; j++) {
636 if ((bbuf.get(j) & 0xFF) != getContent().charAt(j)) {
637 return false;
638 }
639 }
640 return true;
641 } else if (operation.equals(MagicMimeEntryOperation.NOT_EQUALS)) {
642 int read = getContent().length();
643 for (int j = 0; j < read; j++) {
644 if ((bbuf.get(j) & 0xFF) != getContent().charAt(j)) {
645 return true;
646 }
647 }
648 return false;
649 } else if (operation.equals(MagicMimeEntryOperation.GREATER_THAN)) {
650 String buffer = new String(bbuf.array());
651 return buffer.compareTo(getContent()) > 0;
652 } else if (operation.equals(MagicMimeEntryOperation.LESS_THAN)) {
653 String buffer = new String(bbuf.array());
654 return buffer.compareTo(getContent()) < 0;
655 } else
656 return false;
657 }
658
659 private long getMask(String maskString) {
660 String [] tokens = maskString.split("&");
661 if(tokens.length < 2){
662 return 0xffffffffL;
663 }
664 if(tokens[1].startsWith("0x")) {
665 return Long.parseLong(tokens[1].substring(2).trim(), 16);
666 }else if(tokens[1].startsWith("0")) {
667 return Long.parseLong(tokens[1], 8);
668 }else {
669 return Long.parseLong(tokens[1]);
670 }
671 }
672
673 private boolean matchByte(ByteBuffer bbuf) throws IOException {
674 short found = (short) ((bbuf.get(0) & 0xff) & (short)getMask(typeStr));
675
676 if (operation.equals(MagicMimeEntryOperation.EQUALS)) {
677 return found == contentNumber;
678 } else if (operation.equals(MagicMimeEntryOperation.NOT_EQUALS)) {
679 return found != contentNumber;
680 } else if (operation.equals(MagicMimeEntryOperation.GREATER_THAN)) {
681 return found > contentNumber;
682 } else if (operation.equals(MagicMimeEntryOperation.LESS_THAN)) {
683 return found < contentNumber;
684 } else if (operation.equals(MagicMimeEntryOperation.AND)) {
685 boolean result = (found & contentNumber) == contentNumber;
686 return result;
687 } else if (operation.equals(MagicMimeEntryOperation.ANY)) {
688 return true;
689 } else if (operation.equals(MagicMimeEntryOperation.CLEAR)) {
690 long maskedFound = found & contentNumber;
691 boolean result = (maskedFound ^ contentNumber) == 0;
692 return result;
693 } else if (operation.equals(MagicMimeEntryOperation.NEGATED)) {
694 int negatedFound = ~found;
695 return negatedFound == contentNumber;
696 } else
697 return false;
698 }
699
700 private boolean matchShort(ByteBuffer bbuf, ByteOrder bo)
701 throws IOException {
702 bbuf.order(bo);
703
704 int found = (int)((bbuf.getShort() & 0xffff) & (int)getMask(typeStr));
705
706 if (operation.equals(MagicMimeEntryOperation.EQUALS)) {
707 return found == contentNumber;
708 } else if (operation.equals(MagicMimeEntryOperation.NOT_EQUALS)) {
709 return found != contentNumber;
710 } else if (operation.equals(MagicMimeEntryOperation.GREATER_THAN)) {
711 return found > contentNumber;
712 } else if (operation.equals(MagicMimeEntryOperation.LESS_THAN)) {
713 return found < contentNumber;
714 } else if (operation.equals(MagicMimeEntryOperation.AND)) {
715 boolean result = (found & contentNumber) == contentNumber;
716 return result;
717 } else if (operation.equals(MagicMimeEntryOperation.ANY)) {
718 return true;
719 } else if (operation.equals(MagicMimeEntryOperation.CLEAR)) {
720 long maskedFound = found & contentNumber;
721 boolean result = (maskedFound ^ contentNumber) == 0;
722 return result;
723 } else if (operation.equals(MagicMimeEntryOperation.NEGATED)) {
724 int negatedFound = ~found;
725 return negatedFound == contentNumber;
726 } else
727 return false;
728 }
729
730 private boolean matchLong(ByteBuffer bbuf, ByteOrder bo)
731 throws IOException {
732 bbuf.order(bo);
733
734 long found = (long)((bbuf.getInt() & 0xffffffffL) & getMask(typeStr));
735
736 if (operation.equals(MagicMimeEntryOperation.EQUALS)) {
737 return found == contentNumber;
738 } else if (operation.equals(MagicMimeEntryOperation.NOT_EQUALS)) {
739 return found != contentNumber;
740 } else if (operation.equals(MagicMimeEntryOperation.GREATER_THAN)) {
741 return found > contentNumber;
742 } else if (operation.equals(MagicMimeEntryOperation.LESS_THAN)) {
743 return found < contentNumber;
744 } else if (operation.equals(MagicMimeEntryOperation.AND)) {
745 boolean result = (found & contentNumber) == contentNumber;
746 return result;
747 } else if (operation.equals(MagicMimeEntryOperation.ANY)) {
748 return true;
749 } else if (operation.equals(MagicMimeEntryOperation.CLEAR)) {
750 long maskedFound = found & contentNumber;
751 boolean result = (maskedFound ^ contentNumber) == 0;
752 return result;
753 } else if (operation.equals(MagicMimeEntryOperation.NEGATED)) {
754 long negatedFound = ~found;
755 return negatedFound == contentNumber;
756 } else
757 return false;
758 }
759
760
761
762
763
764
765
766
767
768
769
770
771 private static String stringWithEscapeSubstitutions(String s) {
772 StringBuffer ret = new StringBuffer();
773 int len = s.length();
774 int indx = 0;
775 int c;
776 while (indx < len) {
777 c = s.charAt(indx);
778 if (c == '\n') {
779 break;
780 }
781
782 if (c == '\\') {
783 indx++;
784 if (indx >= len) {
785 ret.append((char) c);
786 break;
787 }
788
789 int cn = s.charAt(indx);
790
791 if (cn == '\\') {
792 ret.append('\\');
793 } else if (cn == ' ') {
794 ret.append(' ');
795 } else if (cn == 't') {
796 ret.append('\t');
797 } else if (cn == 'n') {
798 ret.append('\n');
799 } else if (cn == 'r') {
800 ret.append('\r');
801 } else if (cn == 'x') {
802
803
804 indx += 2;
805 if (indx >= len) {
806 ret.append((char) c);
807 ret.append((char) cn);
808 break;
809 }
810 String hexDigits = s.substring(indx - 1, indx + 1);
811 int hexEncodedValue;
812 try {
813 hexEncodedValue = Integer.parseInt(hexDigits, 16);
814 } catch (NumberFormatException x) {
815 ret.append((char) c);
816 ret.append(hexDigits);
817 break;
818 }
819 ret.append((char) hexEncodedValue);
820 } else if (cn >= '\60' && cn <= '\67') {
821 int escape = cn - '0';
822 indx++;
823 if (indx >= len) {
824 ret.append((char) escape);
825 break;
826 }
827 cn = s.charAt(indx);
828 if (cn >= '\60' && cn <= '\67') {
829 escape = escape << 3;
830 escape = escape | (cn - '0');
831
832 indx++;
833 if (indx >= len) {
834 ret.append((char) escape);
835 break;
836 }
837 cn = s.charAt(indx);
838 if (cn >= '\60' && cn <= '\67') {
839 escape = escape << 3;
840 escape = escape | (cn - '0');
841 } else {
842 indx--;
843 }
844 } else {
845 indx--;
846 }
847 ret.append((char) escape);
848 } else {
849 ret.append((char) cn);
850 }
851 } else {
852 ret.append((char) c);
853 }
854 indx++;
855 }
856 return new String(ret);
857 }
858
859 public boolean containsMimeType(String mimeType) {
860 if (this.mimeType != null && this.mimeType.equals(mimeType))
861 return true;
862
863 for (Iterator it = subEntries.iterator(); it.hasNext();) {
864 MagicMimeEntry subEntry = (MagicMimeEntry) it.next();
865 if (subEntry.containsMimeType(mimeType))
866 return true;
867 }
868 return false;
869 }
870
871 public MagicMimeEntry getParent() {
872 return parent;
873 }
874
875 public List getSubEntries() {
876 return Collections.unmodifiableList(subEntries);
877 }
878 }