rsc | c15ce40 | 2005-08-10 18:25:39 +0000 | [diff] [blame] | 1 | #include <u.h> |
| 2 | #include <libc.h> |
| 3 | #include <bio.h> |
| 4 | #include <ctype.h> |
| 5 | #include <mach.h> |
| 6 | |
| 7 | /* |
| 8 | * file - determine type of file |
| 9 | */ |
| 10 | #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24)) |
| 11 | |
| 12 | uchar buf[6001]; |
| 13 | short cfreq[140]; |
| 14 | short wfreq[50]; |
| 15 | int nbuf; |
| 16 | Dir* mbuf; |
| 17 | int fd; |
| 18 | char *fname; |
| 19 | char *slash; |
| 20 | |
| 21 | enum |
| 22 | { |
| 23 | Cword, |
| 24 | Fword, |
| 25 | Aword, |
| 26 | Alword, |
| 27 | Lword, |
| 28 | I1, |
| 29 | I2, |
| 30 | I3, |
| 31 | Clatin = 128, |
| 32 | Cbinary, |
| 33 | Cnull, |
| 34 | Ceascii, |
| 35 | Cutf, |
| 36 | }; |
| 37 | struct |
| 38 | { |
| 39 | char* word; |
| 40 | int class; |
| 41 | } dict[] = |
| 42 | { |
| 43 | "PATH", Lword, |
| 44 | "TEXT", Aword, |
| 45 | "adt", Alword, |
| 46 | "aggr", Alword, |
| 47 | "alef", Alword, |
| 48 | "array", Lword, |
| 49 | "block", Fword, |
| 50 | "chan", Alword, |
| 51 | "char", Cword, |
| 52 | "common", Fword, |
| 53 | "con", Lword, |
| 54 | "data", Fword, |
| 55 | "dimension", Fword, |
| 56 | "double", Cword, |
| 57 | "extern", Cword, |
| 58 | "bio", I2, |
| 59 | "float", Cword, |
| 60 | "fn", Lword, |
| 61 | "function", Fword, |
| 62 | "h", I3, |
| 63 | "implement", Lword, |
| 64 | "import", Lword, |
| 65 | "include", I1, |
| 66 | "int", Cword, |
| 67 | "integer", Fword, |
| 68 | "iota", Lword, |
| 69 | "libc", I2, |
| 70 | "long", Cword, |
| 71 | "module", Lword, |
| 72 | "real", Fword, |
| 73 | "ref", Lword, |
| 74 | "register", Cword, |
| 75 | "self", Lword, |
| 76 | "short", Cword, |
| 77 | "static", Cword, |
| 78 | "stdio", I2, |
| 79 | "struct", Cword, |
| 80 | "subroutine", Fword, |
| 81 | "u", I2, |
| 82 | "void", Cword, |
| 83 | }; |
| 84 | |
| 85 | /* codes for 'mode' field in language structure */ |
| 86 | enum { |
| 87 | Normal = 0, |
| 88 | First, /* first entry for language spanning several ranges */ |
| 89 | Multi, /* later entries " " " ... */ |
| 90 | Shared, /* codes used in several languages */ |
| 91 | }; |
| 92 | |
| 93 | struct |
| 94 | { |
| 95 | int mode; /* see enum above */ |
| 96 | int count; |
| 97 | int low; |
| 98 | int high; |
| 99 | char *name; |
| 100 | |
| 101 | } language[] = |
| 102 | { |
| 103 | Normal, 0, 0x0080, 0x0080, "Extended Latin", |
| 104 | Normal, 0, 0x0100, 0x01FF, "Extended Latin", |
| 105 | Normal, 0, 0x0370, 0x03FF, "Greek", |
| 106 | Normal, 0, 0x0400, 0x04FF, "Cyrillic", |
| 107 | Normal, 0, 0x0530, 0x058F, "Armenian", |
| 108 | Normal, 0, 0x0590, 0x05FF, "Hebrew", |
| 109 | Normal, 0, 0x0600, 0x06FF, "Arabic", |
| 110 | Normal, 0, 0x0900, 0x097F, "Devanagari", |
| 111 | Normal, 0, 0x0980, 0x09FF, "Bengali", |
| 112 | Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi", |
| 113 | Normal, 0, 0x0A80, 0x0AFF, "Gujarati", |
| 114 | Normal, 0, 0x0B00, 0x0B7F, "Oriya", |
| 115 | Normal, 0, 0x0B80, 0x0BFF, "Tamil", |
| 116 | Normal, 0, 0x0C00, 0x0C7F, "Telugu", |
| 117 | Normal, 0, 0x0C80, 0x0CFF, "Kannada", |
| 118 | Normal, 0, 0x0D00, 0x0D7F, "Malayalam", |
| 119 | Normal, 0, 0x0E00, 0x0E7F, "Thai", |
| 120 | Normal, 0, 0x0E80, 0x0EFF, "Lao", |
| 121 | Normal, 0, 0x1000, 0x105F, "Tibetan", |
| 122 | Normal, 0, 0x10A0, 0x10FF, "Georgian", |
| 123 | Normal, 0, 0x3040, 0x30FF, "Japanese", |
| 124 | Normal, 0, 0x3100, 0x312F, "Chinese", |
| 125 | First, 0, 0x3130, 0x318F, "Korean", |
| 126 | Multi, 0, 0x3400, 0x3D2F, "Korean", |
| 127 | Shared, 0, 0x4e00, 0x9fff, "CJK", |
| 128 | Normal, 0, 0, 0, 0, /* terminal entry */ |
| 129 | }; |
| 130 | |
| 131 | |
| 132 | enum |
| 133 | { |
| 134 | Fascii, /* printable ascii */ |
| 135 | Flatin, /* latin 1*/ |
| 136 | Futf, /* UTf character set */ |
| 137 | Fbinary, /* binary */ |
| 138 | Feascii, /* ASCII with control chars */ |
| 139 | Fnull, /* NULL in file */ |
| 140 | } guess; |
| 141 | |
| 142 | void bump_utf_count(Rune); |
| 143 | int cistrncmp(char*, char*, int); |
| 144 | void filetype(int); |
| 145 | int getfontnum(uchar*, uchar**); |
| 146 | int isas(void); |
| 147 | int isc(void); |
| 148 | int isenglish(void); |
| 149 | int ishp(void); |
| 150 | int ishtml(void); |
| 151 | int isrfc822(void); |
| 152 | int ismbox(void); |
| 153 | int islimbo(void); |
| 154 | int ismung(void); |
| 155 | int isp9bit(void); |
| 156 | int isp9font(void); |
| 157 | int isrtf(void); |
| 158 | int ismsdos(void); |
| 159 | int iself(void); |
| 160 | int istring(void); |
| 161 | int iff(void); |
| 162 | int long0(void); |
| 163 | int istar(void); |
| 164 | int p9bitnum(uchar*); |
| 165 | int p9subfont(uchar*); |
| 166 | void print_utf(void); |
| 167 | void type(char*, int); |
| 168 | int utf_count(void); |
| 169 | void wordfreq(void); |
| 170 | |
| 171 | int (*call[])(void) = |
| 172 | { |
| 173 | long0, /* recognizable by first 4 bytes */ |
| 174 | istring, /* recognizable by first string */ |
| 175 | iff, /* interchange file format (strings) */ |
| 176 | isrfc822, /* email file */ |
| 177 | ismbox, /* mail box */ |
| 178 | istar, /* recognizable by tar checksum */ |
| 179 | ishtml, /* html keywords */ |
| 180 | /* iscint, /* compiler/assembler intermediate */ |
| 181 | islimbo, /* limbo source */ |
| 182 | isc, /* c & alef compiler key words */ |
| 183 | isas, /* assembler key words */ |
| 184 | ismung, /* entropy compressed/encrypted */ |
| 185 | isp9font, /* plan 9 font */ |
| 186 | isp9bit, /* plan 9 image (as from /dev/window) */ |
| 187 | isenglish, /* char frequency English */ |
| 188 | isrtf, /* rich text format */ |
| 189 | ismsdos, /* msdos exe (virus file attachement) */ |
| 190 | iself, /* ELF (foreign) executable */ |
| 191 | 0 |
| 192 | }; |
| 193 | |
| 194 | int mime; |
| 195 | |
| 196 | #define OCTET "application/octet-stream\n" |
| 197 | #define PLAIN "text/plain\n" |
| 198 | |
| 199 | void |
| 200 | main(int argc, char *argv[]) |
| 201 | { |
| 202 | int i, j, maxlen; |
| 203 | char *cp; |
| 204 | Rune r; |
| 205 | |
| 206 | ARGBEGIN{ |
| 207 | case 'm': |
| 208 | mime = 1; |
| 209 | break; |
| 210 | default: |
| 211 | fprint(2, "usage: file [-m] [file...]\n"); |
| 212 | exits("usage"); |
| 213 | }ARGEND; |
| 214 | |
| 215 | maxlen = 0; |
| 216 | if(mime == 0 || argc > 1){ |
| 217 | for(i = 0; i < argc; i++) { |
| 218 | for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp)) |
| 219 | ; |
| 220 | if(j > maxlen) |
| 221 | maxlen = j; |
| 222 | } |
| 223 | } |
| 224 | if (argc <= 0) { |
| 225 | if(!mime) |
| 226 | print ("stdin: "); |
| 227 | filetype(0); |
| 228 | } |
| 229 | else { |
| 230 | for(i = 0; i < argc; i++) |
| 231 | type(argv[i], maxlen); |
| 232 | } |
| 233 | exits(0); |
| 234 | } |
| 235 | |
| 236 | void |
| 237 | type(char *file, int nlen) |
| 238 | { |
| 239 | Rune r; |
| 240 | int i; |
| 241 | char *p; |
| 242 | |
| 243 | if(nlen > 0){ |
| 244 | slash = 0; |
| 245 | for (i = 0, p = file; *p; i++) { |
| 246 | if (*p == '/') /* find rightmost slash */ |
| 247 | slash = p; |
| 248 | p += chartorune(&r, p); /* count runes */ |
| 249 | } |
| 250 | print("%s:%*s",file, nlen-i+1, ""); |
| 251 | } |
| 252 | fname = file; |
| 253 | if ((fd = open(file, OREAD)) < 0) { |
| 254 | print("cannot open\n"); |
| 255 | return; |
| 256 | } |
| 257 | filetype(fd); |
| 258 | close(fd); |
| 259 | } |
| 260 | |
| 261 | void |
| 262 | filetype(int fd) |
| 263 | { |
| 264 | Rune r; |
| 265 | int i, f, n; |
| 266 | char *p, *eob; |
| 267 | |
| 268 | free(mbuf); |
| 269 | mbuf = dirfstat(fd); |
| 270 | if(mbuf == nil){ |
| 271 | print("cannot stat: %r\n"); |
| 272 | return; |
| 273 | } |
| 274 | if(mbuf->mode & DMDIR) { |
| 275 | print(mime ? "text/directory\n" : "directory\n"); |
| 276 | return; |
| 277 | } |
| 278 | if(mbuf->type != 'M' && mbuf->type != '|') { |
| 279 | print(mime ? OCTET : "special file #%c/%s\n", |
| 280 | mbuf->type, mbuf->name); |
| 281 | return; |
| 282 | } |
| 283 | nbuf = read(fd, buf, sizeof(buf)-1); |
| 284 | |
| 285 | if(nbuf < 0) { |
| 286 | print("cannot read\n"); |
| 287 | return; |
| 288 | } |
| 289 | if(nbuf == 0) { |
| 290 | print(mime ? PLAIN : "empty file\n"); |
| 291 | return; |
| 292 | } |
| 293 | buf[nbuf] = 0; |
| 294 | |
| 295 | /* |
| 296 | * build histogram table |
| 297 | */ |
| 298 | memset(cfreq, 0, sizeof(cfreq)); |
| 299 | for (i = 0; language[i].name; i++) |
| 300 | language[i].count = 0; |
| 301 | eob = (char *)buf+nbuf; |
| 302 | for(n = 0, p = (char *)buf; p < eob; n++) { |
| 303 | if (!fullrune(p, eob-p) && eob-p < UTFmax) |
| 304 | break; |
| 305 | p += chartorune(&r, p); |
| 306 | if (r == 0) |
| 307 | f = Cnull; |
| 308 | else if (r <= 0x7f) { |
| 309 | if (!isprint(r) && !isspace(r)) |
| 310 | f = Ceascii; /* ASCII control char */ |
| 311 | else f = r; |
| 312 | } else if (r == 0x080) { |
| 313 | bump_utf_count(r); |
| 314 | f = Cutf; |
| 315 | } else if (r < 0xA0) |
| 316 | f = Cbinary; /* Invalid Runes */ |
| 317 | else if (r <= 0xff) |
| 318 | f = Clatin; /* Latin 1 */ |
| 319 | else { |
| 320 | bump_utf_count(r); |
| 321 | f = Cutf; /* UTF extension */ |
| 322 | } |
| 323 | cfreq[f]++; /* ASCII chars peg directly */ |
| 324 | } |
| 325 | /* |
| 326 | * gross classify |
| 327 | */ |
| 328 | if (cfreq[Cbinary]) |
| 329 | guess = Fbinary; |
| 330 | else if (cfreq[Cutf]) |
| 331 | guess = Futf; |
| 332 | else if (cfreq[Clatin]) |
| 333 | guess = Flatin; |
| 334 | else if (cfreq[Ceascii]) |
| 335 | guess = Feascii; |
| 336 | else if (cfreq[Cnull] == n) { |
| 337 | print(mime ? OCTET : "first block all null bytes\n"); |
| 338 | return; |
| 339 | } |
| 340 | else guess = Fascii; |
| 341 | /* |
| 342 | * lookup dictionary words |
| 343 | */ |
| 344 | memset(wfreq, 0, sizeof(wfreq)); |
| 345 | if(guess == Fascii || guess == Flatin || guess == Futf) |
| 346 | wordfreq(); |
| 347 | /* |
| 348 | * call individual classify routines |
| 349 | */ |
| 350 | for(i=0; call[i]; i++) |
| 351 | if((*call[i])()) |
| 352 | return; |
| 353 | |
| 354 | /* |
| 355 | * if all else fails, |
| 356 | * print out gross classification |
| 357 | */ |
| 358 | if (nbuf < 100 && !mime) |
| 359 | print(mime ? PLAIN : "short "); |
| 360 | if (guess == Fascii) |
| 361 | print(mime ? PLAIN : "Ascii\n"); |
| 362 | else if (guess == Feascii) |
| 363 | print(mime ? PLAIN : "extended ascii\n"); |
| 364 | else if (guess == Flatin) |
| 365 | print(mime ? PLAIN : "latin ascii\n"); |
| 366 | else if (guess == Futf && utf_count() < 4) |
| 367 | print_utf(); |
| 368 | else print(mime ? OCTET : "binary\n"); |
| 369 | } |
| 370 | |
| 371 | void |
| 372 | bump_utf_count(Rune r) |
| 373 | { |
| 374 | int low, high, mid; |
| 375 | |
| 376 | high = sizeof(language)/sizeof(language[0])-1; |
| 377 | for (low = 0; low < high;) { |
| 378 | mid = (low+high)/2; |
| 379 | if (r >=language[mid].low) { |
| 380 | if (r <= language[mid].high) { |
| 381 | language[mid].count++; |
| 382 | break; |
| 383 | } else low = mid+1; |
| 384 | } else high = mid; |
| 385 | } |
| 386 | } |
| 387 | |
| 388 | int |
| 389 | utf_count(void) |
| 390 | { |
| 391 | int i, count; |
| 392 | |
| 393 | count = 0; |
| 394 | for (i = 0; language[i].name; i++) |
| 395 | if (language[i].count > 0) |
| 396 | switch (language[i].mode) { |
| 397 | case Normal: |
| 398 | case First: |
| 399 | count++; |
| 400 | break; |
| 401 | default: |
| 402 | break; |
| 403 | } |
| 404 | return count; |
| 405 | } |
| 406 | |
| 407 | int |
| 408 | chkascii(void) |
| 409 | { |
| 410 | int i; |
| 411 | |
| 412 | for (i = 'a'; i < 'z'; i++) |
| 413 | if (cfreq[i]) |
| 414 | return 1; |
| 415 | for (i = 'A'; i < 'Z'; i++) |
| 416 | if (cfreq[i]) |
| 417 | return 1; |
| 418 | return 0; |
| 419 | } |
| 420 | |
| 421 | int |
| 422 | find_first(char *name) |
| 423 | { |
| 424 | int i; |
| 425 | |
| 426 | for (i = 0; language[i].name != 0; i++) |
| 427 | if (language[i].mode == First |
| 428 | && strcmp(language[i].name, name) == 0) |
| 429 | return i; |
| 430 | return -1; |
| 431 | } |
| 432 | |
| 433 | void |
| 434 | print_utf(void) |
| 435 | { |
| 436 | int i, printed, j; |
| 437 | |
| 438 | if(mime){ |
| 439 | print(PLAIN); |
| 440 | return; |
| 441 | } |
| 442 | if (chkascii()) { |
| 443 | printed = 1; |
| 444 | print("Ascii"); |
| 445 | } else |
| 446 | printed = 0; |
| 447 | for (i = 0; language[i].name; i++) |
| 448 | if (language[i].count) { |
| 449 | switch(language[i].mode) { |
| 450 | case Multi: |
| 451 | j = find_first(language[i].name); |
| 452 | if (j < 0) |
| 453 | break; |
| 454 | if (language[j].count > 0) |
| 455 | break; |
| 456 | /* Fall through */ |
| 457 | case Normal: |
| 458 | case First: |
| 459 | if (printed) |
| 460 | print(" & "); |
| 461 | else printed = 1; |
| 462 | print("%s", language[i].name); |
| 463 | break; |
| 464 | case Shared: |
| 465 | default: |
| 466 | break; |
| 467 | } |
| 468 | } |
| 469 | if(!printed) |
| 470 | print("UTF"); |
| 471 | print(" text\n"); |
| 472 | } |
| 473 | |
| 474 | void |
| 475 | wordfreq(void) |
| 476 | { |
| 477 | int low, high, mid, r; |
| 478 | uchar *p, *p2, c; |
| 479 | |
| 480 | p = buf; |
| 481 | for(;;) { |
| 482 | while (p < buf+nbuf && !isalpha(*p)) |
| 483 | p++; |
| 484 | if (p >= buf+nbuf) |
| 485 | return; |
| 486 | p2 = p; |
| 487 | while(p < buf+nbuf && isalpha(*p)) |
| 488 | p++; |
| 489 | c = *p; |
| 490 | *p = 0; |
| 491 | high = sizeof(dict)/sizeof(dict[0]); |
| 492 | for(low = 0;low < high;) { |
| 493 | mid = (low+high)/2; |
| 494 | r = strcmp(dict[mid].word, (char*)p2); |
| 495 | if(r == 0) { |
| 496 | wfreq[dict[mid].class]++; |
| 497 | break; |
| 498 | } |
| 499 | if(r < 0) |
| 500 | low = mid+1; |
| 501 | else |
| 502 | high = mid; |
| 503 | } |
| 504 | *p++ = c; |
| 505 | } |
| 506 | } |
| 507 | |
| 508 | typedef struct Filemagic Filemagic; |
| 509 | struct Filemagic { |
| 510 | ulong x; |
| 511 | ulong mask; |
| 512 | char *desc; |
| 513 | char *mime; |
| 514 | }; |
| 515 | |
| 516 | Filemagic long0tab[] = { |
| 517 | 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file\n", OCTET, |
| 518 | 0x31636170, 0xFFFFFFFF, "pac3 audio file\n", OCTET, |
| 519 | 0x32636170, 0xFFFF00FF, "pac4 audio file\n", OCTET, |
| 520 | 0xBA010000, 0xFFFFFFFF, "mpeg system stream\n", OCTET, |
| 521 | 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET, |
| 522 | 0x04034B50, 0xFFFFFFFF, "zip archive\n", "application/zip", |
| 523 | 070707, 0xFFFF, "cpio archive\n", OCTET, |
| 524 | 0x2F7, 0xFFFF, "tex dvi\n", "application/dvi", |
| 525 | 0xfffa0000, 0xfffe0000, "mp3 audio", "audio/mpeg", |
| 526 | }; |
| 527 | |
| 528 | int |
| 529 | filemagic(Filemagic *tab, int ntab, ulong x) |
| 530 | { |
| 531 | int i; |
| 532 | |
| 533 | for(i=0; i<ntab; i++) |
| 534 | if((x&tab[i].mask) == tab[i].x){ |
| 535 | print(mime ? tab[i].mime : tab[i].desc); |
| 536 | return 1; |
| 537 | } |
| 538 | return 0; |
| 539 | } |
| 540 | |
| 541 | int |
| 542 | long0(void) |
| 543 | { |
rsc | cbeb0b2 | 2006-04-01 19:24:03 +0000 | [diff] [blame^] | 544 | /* Fhdr *f; */ |
rsc | c15ce40 | 2005-08-10 18:25:39 +0000 | [diff] [blame] | 545 | long x; |
| 546 | |
| 547 | seek(fd, 0, 0); /* reposition to start of file */ |
| 548 | /* |
| 549 | if(crackhdr(fd, &f)) { |
| 550 | print(mime ? OCTET : "%s\n", f.name); |
| 551 | return 1; |
| 552 | } |
| 553 | */ |
| 554 | x = LENDIAN(buf); |
| 555 | if(filemagic(long0tab, nelem(long0tab), x)) |
| 556 | return 1; |
| 557 | return 0; |
| 558 | } |
| 559 | |
| 560 | /* from tar.c */ |
| 561 | enum { NAMSIZ = 100, TBLOCK = 512 }; |
| 562 | |
| 563 | union hblock |
| 564 | { |
| 565 | char dummy[TBLOCK]; |
| 566 | struct header |
| 567 | { |
| 568 | char name[NAMSIZ]; |
| 569 | char mode[8]; |
| 570 | char uid[8]; |
| 571 | char gid[8]; |
| 572 | char size[12]; |
| 573 | char mtime[12]; |
| 574 | char chksum[8]; |
| 575 | char linkflag; |
| 576 | char linkname[NAMSIZ]; |
| 577 | /* rest are defined by POSIX's ustar format; see p1003.2b */ |
| 578 | char magic[6]; /* "ustar" */ |
| 579 | char version[2]; |
| 580 | char uname[32]; |
| 581 | char gname[32]; |
| 582 | char devmajor[8]; |
| 583 | char devminor[8]; |
| 584 | char prefix[155]; /* if non-null, path = prefix "/" name */ |
| 585 | } dbuf; |
| 586 | }; |
| 587 | |
| 588 | int |
| 589 | checksum(union hblock *hp) |
| 590 | { |
| 591 | int i; |
| 592 | char *cp; |
| 593 | struct header *hdr = &hp->dbuf; |
| 594 | |
| 595 | for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++) |
| 596 | *cp = ' '; |
| 597 | i = 0; |
| 598 | for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++) |
| 599 | i += *cp & 0xff; |
| 600 | return i; |
| 601 | } |
| 602 | |
| 603 | int |
| 604 | istar(void) |
| 605 | { |
| 606 | int chksum; |
| 607 | char tblock[TBLOCK]; |
| 608 | union hblock *hp = (union hblock *)tblock; |
| 609 | struct header *hdr = &hp->dbuf; |
| 610 | |
| 611 | seek(fd, 0, 0); /* reposition to start of file */ |
| 612 | if (readn(fd, tblock, sizeof tblock) != sizeof tblock) |
| 613 | return 0; |
| 614 | chksum = strtol(hdr->chksum, 0, 8); |
| 615 | if (hdr->name[0] != '\0' && checksum(hp) == chksum) { |
| 616 | if (strcmp(hdr->magic, "ustar") == 0) |
| 617 | print(mime? "application/x-ustar\n": |
| 618 | "posix tar archive\n"); |
| 619 | else |
| 620 | print(mime? "application/x-tar\n": "tar archive\n"); |
| 621 | return 1; |
| 622 | } |
| 623 | return 0; |
| 624 | } |
| 625 | |
| 626 | /* |
| 627 | * initial words to classify file |
| 628 | */ |
| 629 | struct FILE_STRING |
| 630 | { |
| 631 | char *key; |
| 632 | char *filetype; |
| 633 | int length; |
| 634 | char *mime; |
| 635 | } file_string[] = |
| 636 | { |
| 637 | "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream", |
| 638 | "!<arch>\n", "archive", 8, "application/octet-stream", |
| 639 | "070707", "cpio archive - ascii header", 6, "application/octet-stream", |
rsc | c15ce40 | 2005-08-10 18:25:39 +0000 | [diff] [blame] | 640 | "%!", "postscript", 2, "application/postscript", |
| 641 | "\004%!", "postscript", 3, "application/postscript", |
| 642 | "x T post", "troff output for post", 8, "application/troff", |
| 643 | "x T Latin1", "troff output for Latin1", 10, "application/troff", |
| 644 | "x T utf", "troff output for UTF", 7, "application/troff", |
| 645 | "x T 202", "troff output for 202", 7, "application/troff", |
| 646 | "x T aps", "troff output for aps", 7, "application/troff", |
| 647 | "GIF", "GIF image", 3, "image/gif", |
| 648 | "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript", |
| 649 | "%PDF", "PDF", 4, "application/pdf", |
| 650 | "<html>\n", "HTML file", 7, "text/html", |
| 651 | "<HTML>\n", "HTML file", 7, "text/html", |
| 652 | "compressed\n", "Compressed image or subfont", 11, "application/octet-stream", |
| 653 | "\111\111\052\000", "tiff", 4, "image/tiff", |
| 654 | "\115\115\000\052", "tiff", 4, "image/tiff", |
| 655 | "\377\330\377\340", "jpeg", 4, "image/jpeg", |
| 656 | "\377\330\377\341", "jpeg", 4, "image/jpeg", |
| 657 | "\377\330\377\333", "jpeg", 4, "image/jpeg", |
| 658 | "BM", "bmp", 2, "image/bmp", |
| 659 | "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/octet-stream", |
| 660 | "<MakerFile ", "FrameMaker file", 11, "application/framemaker", |
| 661 | "\033%-12345X", "HPJCL file", 9, "application/hpjcl", |
| 662 | "ID3", "mp3 audio with id3", 3, "audio/mpeg", |
| 663 | 0,0,0,0 |
| 664 | }; |
| 665 | |
| 666 | int |
| 667 | istring(void) |
| 668 | { |
rsc | adee167 | 2005-08-31 03:59:25 +0000 | [diff] [blame] | 669 | int i, j; |
rsc | c15ce40 | 2005-08-10 18:25:39 +0000 | [diff] [blame] | 670 | struct FILE_STRING *p; |
| 671 | |
| 672 | for(p = file_string; p->key; p++) { |
| 673 | if(nbuf >= p->length && !memcmp(buf, p->key, p->length)) { |
| 674 | if(mime) |
| 675 | print("%s\n", p->mime); |
| 676 | else |
| 677 | print("%s\n", p->filetype); |
| 678 | return 1; |
| 679 | } |
| 680 | } |
| 681 | if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */ |
| 682 | for(i = 5; i < nbuf; i++) |
| 683 | if(buf[i] == '\n') |
| 684 | break; |
| 685 | if(mime) |
| 686 | print(OCTET); |
| 687 | else |
| 688 | print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5); |
| 689 | return 1; |
| 690 | } |
rsc | adee167 | 2005-08-31 03:59:25 +0000 | [diff] [blame] | 691 | if(buf[0]=='#' && buf[1]=='!'){ |
| 692 | i=2; |
| 693 | for(j=2; j < nbuf && buf[j] != ' ' && buf[j] != '\n' && buf[j] != '\r'; j++) |
| 694 | if(buf[j] == '/') |
| 695 | i = j+1; |
| 696 | if(mime) |
| 697 | print(PLAIN); |
| 698 | else |
| 699 | print("%.*s executable file script\n", utfnlen((char*)buf+i, j-i), (char*)buf+i); |
| 700 | return 1; |
| 701 | } |
rsc | c15ce40 | 2005-08-10 18:25:39 +0000 | [diff] [blame] | 702 | return 0; |
| 703 | } |
| 704 | |
| 705 | int |
| 706 | iff(void) |
| 707 | { |
| 708 | if (strncmp((char*)buf, "FORM", 4) == 0 && |
| 709 | strncmp((char*)buf+8, "AIFF", 4) == 0) { |
| 710 | print("%s\n", mime? "audio/x-aiff": "aiff audio"); |
| 711 | return 1; |
| 712 | } |
| 713 | return 0; |
| 714 | } |
| 715 | |
| 716 | char* html_string[] = |
| 717 | { |
| 718 | "title", |
| 719 | "body", |
| 720 | "head", |
| 721 | "strong", |
| 722 | "h1", |
| 723 | "h2", |
| 724 | "h3", |
| 725 | "h4", |
| 726 | "h5", |
| 727 | "h6", |
| 728 | "ul", |
| 729 | "li", |
| 730 | "dl", |
| 731 | "br", |
| 732 | "em", |
| 733 | 0, |
| 734 | }; |
| 735 | |
| 736 | int |
| 737 | ishtml(void) |
| 738 | { |
| 739 | uchar *p, *q; |
| 740 | int i, count; |
| 741 | |
| 742 | /* compare strings between '<' and '>' to html table */ |
| 743 | count = 0; |
| 744 | p = buf; |
| 745 | for(;;) { |
| 746 | while (p < buf+nbuf && *p != '<') |
| 747 | p++; |
| 748 | p++; |
| 749 | if (p >= buf+nbuf) |
| 750 | break; |
| 751 | if(*p == '/') |
| 752 | p++; |
| 753 | q = p; |
| 754 | while(p < buf+nbuf && *p != '>') |
| 755 | p++; |
| 756 | if (p >= buf+nbuf) |
| 757 | break; |
| 758 | for(i = 0; html_string[i]; i++) { |
| 759 | if(cistrncmp(html_string[i], (char*)q, p-q) == 0) { |
| 760 | if(count++ > 4) { |
| 761 | print(mime ? "text/html\n" : "HTML file\n"); |
| 762 | return 1; |
| 763 | } |
| 764 | break; |
| 765 | } |
| 766 | } |
| 767 | p++; |
| 768 | } |
| 769 | return 0; |
| 770 | } |
| 771 | |
| 772 | char* rfc822_string[] = |
| 773 | { |
| 774 | "from:", |
| 775 | "date:", |
| 776 | "to:", |
| 777 | "subject:", |
| 778 | "received:", |
| 779 | "reply to:", |
| 780 | "sender:", |
| 781 | 0, |
| 782 | }; |
| 783 | |
| 784 | int |
| 785 | isrfc822(void) |
| 786 | { |
| 787 | |
| 788 | char *p, *q, *r; |
| 789 | int i, count; |
| 790 | |
| 791 | count = 0; |
| 792 | p = (char*)buf; |
| 793 | for(;;) { |
| 794 | q = strchr(p, '\n'); |
| 795 | if(q == nil) |
| 796 | break; |
| 797 | *q = 0; |
| 798 | if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){ |
| 799 | count++; |
| 800 | *q = '\n'; |
| 801 | p = q+1; |
| 802 | continue; |
| 803 | } |
| 804 | *q = '\n'; |
| 805 | if(*p != '\t' && *p != ' '){ |
| 806 | r = strchr(p, ':'); |
| 807 | if(r == 0 || r > q) |
| 808 | break; |
| 809 | for(i = 0; rfc822_string[i]; i++) { |
| 810 | if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){ |
| 811 | count++; |
| 812 | break; |
| 813 | } |
| 814 | } |
| 815 | } |
| 816 | p = q+1; |
| 817 | } |
| 818 | if(count >= 3){ |
| 819 | print(mime ? "message/rfc822\n" : "email file\n"); |
| 820 | return 1; |
| 821 | } |
| 822 | return 0; |
| 823 | } |
| 824 | |
| 825 | int |
| 826 | ismbox(void) |
| 827 | { |
| 828 | char *p, *q; |
| 829 | |
| 830 | p = (char*)buf; |
| 831 | q = strchr(p, '\n'); |
| 832 | if(q == nil) |
| 833 | return 0; |
| 834 | *q = 0; |
| 835 | if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){ |
| 836 | print(mime ? "text/plain\n" : "mail box\n"); |
| 837 | return 1; |
| 838 | } |
| 839 | *q = '\n'; |
| 840 | return 0; |
| 841 | } |
| 842 | |
| 843 | int |
| 844 | isc(void) |
| 845 | { |
| 846 | int n; |
| 847 | |
| 848 | n = wfreq[I1]; |
| 849 | /* |
| 850 | * includes |
| 851 | */ |
| 852 | if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n) |
| 853 | goto yes; |
| 854 | if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n) |
| 855 | goto yes; |
| 856 | /* |
| 857 | * declarations |
| 858 | */ |
| 859 | if(wfreq[Cword] >= 5 && cfreq[';'] >= 5) |
| 860 | goto yes; |
| 861 | /* |
| 862 | * assignments |
| 863 | */ |
| 864 | if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1) |
| 865 | goto yes; |
| 866 | return 0; |
| 867 | |
| 868 | yes: |
| 869 | if(mime){ |
| 870 | print(PLAIN); |
| 871 | return 1; |
| 872 | } |
| 873 | if(wfreq[Alword] > 0) |
| 874 | print("alef program\n"); |
| 875 | else |
| 876 | print("c program\n"); |
| 877 | return 1; |
| 878 | } |
| 879 | |
| 880 | int |
| 881 | islimbo(void) |
| 882 | { |
| 883 | |
| 884 | /* |
| 885 | * includes |
| 886 | */ |
| 887 | if(wfreq[Lword] < 4) |
| 888 | return 0; |
| 889 | print(mime ? PLAIN : "limbo program\n"); |
| 890 | return 1; |
| 891 | } |
| 892 | |
| 893 | int |
| 894 | isas(void) |
| 895 | { |
| 896 | |
| 897 | /* |
| 898 | * includes |
| 899 | */ |
| 900 | if(wfreq[Aword] < 2) |
| 901 | return 0; |
| 902 | print(mime ? PLAIN : "as program\n"); |
| 903 | return 1; |
| 904 | } |
| 905 | |
| 906 | /* |
| 907 | * low entropy means encrypted |
| 908 | */ |
| 909 | int |
| 910 | ismung(void) |
| 911 | { |
| 912 | int i, bucket[8]; |
| 913 | float cs; |
| 914 | |
| 915 | if(nbuf < 64) |
| 916 | return 0; |
| 917 | memset(bucket, 0, sizeof(bucket)); |
| 918 | for(i=0; i<64; i++) |
| 919 | bucket[(buf[i]>>5)&07] += 1; |
| 920 | |
| 921 | cs = 0.; |
| 922 | for(i=0; i<8; i++) |
| 923 | cs += (bucket[i]-8)*(bucket[i]-8); |
| 924 | cs /= 8.; |
| 925 | if(cs <= 24.322) { |
| 926 | if(buf[0]==0x1f && (buf[1]==0x8b || buf[1]==0x9d)) |
| 927 | print(mime ? OCTET : "compressed\n"); |
| 928 | else |
| 929 | print(mime ? OCTET : "encrypted\n"); |
| 930 | return 1; |
| 931 | } |
| 932 | return 0; |
| 933 | } |
| 934 | |
| 935 | /* |
| 936 | * english by punctuation and frequencies |
| 937 | */ |
| 938 | int |
| 939 | isenglish(void) |
| 940 | { |
| 941 | int vow, comm, rare, badpun, punct; |
| 942 | char *p; |
| 943 | |
| 944 | if(guess != Fascii && guess != Feascii) |
| 945 | return 0; |
| 946 | badpun = 0; |
| 947 | punct = 0; |
| 948 | for(p = (char *)buf; p < (char *)buf+nbuf-1; p++) |
| 949 | switch(*p) { |
| 950 | case '.': |
| 951 | case ',': |
| 952 | case ')': |
| 953 | case '%': |
| 954 | case ';': |
| 955 | case ':': |
| 956 | case '?': |
| 957 | punct++; |
| 958 | if(p[1] != ' ' && p[1] != '\n') |
| 959 | badpun++; |
| 960 | } |
| 961 | if(badpun*5 > punct) |
| 962 | return 0; |
| 963 | if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */ |
| 964 | return 0; |
| 965 | if(2*cfreq[';'] > cfreq['e']) |
| 966 | return 0; |
| 967 | |
| 968 | vow = 0; |
| 969 | for(p="AEIOU"; *p; p++) { |
rsc | bce0902 | 2005-08-11 02:34:29 +0000 | [diff] [blame] | 970 | vow += cfreq[(uchar)*p]; |
| 971 | vow += cfreq[tolower((uchar)*p)]; |
rsc | c15ce40 | 2005-08-10 18:25:39 +0000 | [diff] [blame] | 972 | } |
| 973 | comm = 0; |
| 974 | for(p="ETAION"; *p; p++) { |
rsc | bce0902 | 2005-08-11 02:34:29 +0000 | [diff] [blame] | 975 | comm += cfreq[(uchar)*p]; |
| 976 | comm += cfreq[tolower((uchar)*p)]; |
rsc | c15ce40 | 2005-08-10 18:25:39 +0000 | [diff] [blame] | 977 | } |
| 978 | rare = 0; |
| 979 | for(p="VJKQXZ"; *p; p++) { |
rsc | bce0902 | 2005-08-11 02:34:29 +0000 | [diff] [blame] | 980 | rare += cfreq[(uchar)*p]; |
| 981 | rare += cfreq[tolower((uchar)*p)]; |
rsc | c15ce40 | 2005-08-10 18:25:39 +0000 | [diff] [blame] | 982 | } |
| 983 | if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) { |
| 984 | print(mime ? PLAIN : "English text\n"); |
| 985 | return 1; |
| 986 | } |
| 987 | return 0; |
| 988 | } |
| 989 | |
| 990 | /* |
| 991 | * pick up a number with |
| 992 | * syntax _*[0-9]+_ |
| 993 | */ |
| 994 | #define P9BITLEN 12 |
| 995 | int |
| 996 | p9bitnum(uchar *bp) |
| 997 | { |
| 998 | int n, c, len; |
| 999 | |
| 1000 | len = P9BITLEN; |
| 1001 | while(*bp == ' ') { |
| 1002 | bp++; |
| 1003 | len--; |
| 1004 | if(len <= 0) |
| 1005 | return -1; |
| 1006 | } |
| 1007 | n = 0; |
| 1008 | while(len > 1) { |
| 1009 | c = *bp++; |
| 1010 | if(!isdigit(c)) |
| 1011 | return -1; |
| 1012 | n = n*10 + c-'0'; |
| 1013 | len--; |
| 1014 | } |
| 1015 | if(*bp != ' ') |
| 1016 | return -1; |
| 1017 | return n; |
| 1018 | } |
| 1019 | |
| 1020 | int |
| 1021 | depthof(char *s, int *newp) |
| 1022 | { |
| 1023 | char *es; |
| 1024 | int d; |
| 1025 | |
| 1026 | *newp = 0; |
| 1027 | es = s+12; |
| 1028 | while(s<es && *s==' ') |
| 1029 | s++; |
| 1030 | if(s == es) |
| 1031 | return -1; |
| 1032 | if('0'<=*s && *s<='9') |
| 1033 | return 1<<atoi(s); |
| 1034 | |
| 1035 | *newp = 1; |
| 1036 | d = 0; |
| 1037 | while(s<es && *s!=' '){ |
| 1038 | s++; /* skip letter */ |
| 1039 | d += strtoul(s, &s, 10); |
| 1040 | } |
| 1041 | |
| 1042 | switch(d){ |
| 1043 | case 32: |
| 1044 | case 24: |
| 1045 | case 16: |
| 1046 | case 8: |
| 1047 | return d; |
| 1048 | } |
| 1049 | return -1; |
| 1050 | } |
| 1051 | |
| 1052 | int |
| 1053 | isp9bit(void) |
| 1054 | { |
| 1055 | int dep, lox, loy, hix, hiy, px, new; |
| 1056 | ulong t; |
| 1057 | long len; |
| 1058 | char *newlabel; |
| 1059 | |
| 1060 | newlabel = "old "; |
| 1061 | |
| 1062 | dep = depthof((char*)buf + 0*P9BITLEN, &new); |
| 1063 | if(new) |
| 1064 | newlabel = ""; |
| 1065 | lox = p9bitnum(buf + 1*P9BITLEN); |
| 1066 | loy = p9bitnum(buf + 2*P9BITLEN); |
| 1067 | hix = p9bitnum(buf + 3*P9BITLEN); |
| 1068 | hiy = p9bitnum(buf + 4*P9BITLEN); |
| 1069 | if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0) |
| 1070 | return 0; |
| 1071 | |
| 1072 | if(dep < 8){ |
| 1073 | px = 8/dep; /* pixels per byte */ |
| 1074 | /* set l to number of bytes of data per scan line */ |
| 1075 | if(lox >= 0) |
| 1076 | len = (hix+px-1)/px - lox/px; |
| 1077 | else{ /* make positive before divide */ |
| 1078 | t = (-lox)+px-1; |
| 1079 | t = (t/px)*px; |
| 1080 | len = (t+hix+px-1)/px; |
| 1081 | } |
| 1082 | }else |
| 1083 | len = (hix-lox)*dep/8; |
| 1084 | len *= (hiy-loy); /* col length */ |
| 1085 | len += 5*P9BITLEN; /* size of initial ascii */ |
| 1086 | |
| 1087 | /* |
| 1088 | * for image file, length is non-zero and must match calculation above |
| 1089 | * for /dev/window and /dev/screen the length is always zero |
| 1090 | * for subfont, the subfont header should follow immediately. |
| 1091 | */ |
| 1092 | if (len != 0 && mbuf->length == 0) { |
| 1093 | print("%splan 9 image\n", newlabel); |
| 1094 | return 1; |
| 1095 | } |
| 1096 | if (mbuf->length == len) { |
| 1097 | print("%splan 9 image\n", newlabel); |
| 1098 | return 1; |
| 1099 | } |
| 1100 | /* Ghostscript sometimes produces a little extra on the end */ |
| 1101 | if (mbuf->length < len+P9BITLEN) { |
| 1102 | print("%splan 9 image\n", newlabel); |
| 1103 | return 1; |
| 1104 | } |
| 1105 | if (p9subfont(buf+len)) { |
| 1106 | print("%ssubfont file\n", newlabel); |
| 1107 | return 1; |
| 1108 | } |
| 1109 | return 0; |
| 1110 | } |
| 1111 | |
| 1112 | int |
| 1113 | p9subfont(uchar *p) |
| 1114 | { |
| 1115 | int n, h, a; |
| 1116 | |
| 1117 | /* if image too big, assume it's a subfont */ |
| 1118 | if (p+3*P9BITLEN > buf+sizeof(buf)) |
| 1119 | return 1; |
| 1120 | |
| 1121 | n = p9bitnum(p + 0*P9BITLEN); /* char count */ |
| 1122 | if (n < 0) |
| 1123 | return 0; |
| 1124 | h = p9bitnum(p + 1*P9BITLEN); /* height */ |
| 1125 | if (h < 0) |
| 1126 | return 0; |
| 1127 | a = p9bitnum(p + 2*P9BITLEN); /* ascent */ |
| 1128 | if (a < 0) |
| 1129 | return 0; |
| 1130 | return 1; |
| 1131 | } |
| 1132 | |
| 1133 | #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') |
| 1134 | |
| 1135 | int |
| 1136 | isp9font(void) |
| 1137 | { |
| 1138 | uchar *cp, *p; |
| 1139 | int i, n; |
| 1140 | char pathname[1024]; |
| 1141 | |
| 1142 | cp = buf; |
| 1143 | if (!getfontnum(cp, &cp)) /* height */ |
| 1144 | return 0; |
| 1145 | if (!getfontnum(cp, &cp)) /* ascent */ |
| 1146 | return 0; |
| 1147 | for (i = 0; 1; i++) { |
| 1148 | if (!getfontnum(cp, &cp)) /* min */ |
| 1149 | break; |
| 1150 | if (!getfontnum(cp, &cp)) /* max */ |
| 1151 | return 0; |
| 1152 | while (WHITESPACE(*cp)) |
| 1153 | cp++; |
| 1154 | for (p = cp; *cp && !WHITESPACE(*cp); cp++) |
| 1155 | ; |
| 1156 | /* construct a path name, if needed */ |
| 1157 | n = 0; |
| 1158 | if (*p != '/' && slash) { |
| 1159 | n = slash-fname+1; |
| 1160 | if (n < sizeof(pathname)) |
| 1161 | memcpy(pathname, fname, n); |
| 1162 | else n = 0; |
| 1163 | } |
| 1164 | if (n+cp-p < sizeof(pathname)) { |
| 1165 | memcpy(pathname+n, p, cp-p); |
| 1166 | n += cp-p; |
| 1167 | pathname[n] = 0; |
| 1168 | if (access(pathname, AEXIST) < 0) |
| 1169 | return 0; |
| 1170 | } |
| 1171 | } |
| 1172 | if (i) { |
| 1173 | print(mime ? "text/plain\n" : "font file\n"); |
| 1174 | return 1; |
| 1175 | } |
| 1176 | return 0; |
| 1177 | } |
| 1178 | |
| 1179 | int |
| 1180 | getfontnum(uchar *cp, uchar **rp) |
| 1181 | { |
| 1182 | while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */ |
| 1183 | cp++; |
| 1184 | if (*cp < '0' || *cp > '9') |
| 1185 | return 0; |
| 1186 | strtoul((char *)cp, (char **)rp, 0); |
| 1187 | if (!WHITESPACE(**rp)) |
| 1188 | return 0; |
| 1189 | return 1; |
| 1190 | } |
| 1191 | |
| 1192 | int |
| 1193 | isrtf(void) |
| 1194 | { |
| 1195 | if(strstr((char *)buf, "\\rtf1")){ |
| 1196 | print(mime ? "application/rtf\n" : "rich text format\n"); |
| 1197 | return 1; |
| 1198 | } |
| 1199 | return 0; |
| 1200 | } |
| 1201 | |
| 1202 | int |
| 1203 | ismsdos(void) |
| 1204 | { |
| 1205 | if (buf[0] == 0x4d && buf[1] == 0x5a){ |
| 1206 | print(mime ? "application/x-msdownload\n" : "MSDOS executable\n"); |
| 1207 | return 1; |
| 1208 | } |
| 1209 | return 0; |
| 1210 | } |
| 1211 | |
| 1212 | int |
| 1213 | iself(void) |
| 1214 | { |
rsc | bce0902 | 2005-08-11 02:34:29 +0000 | [diff] [blame] | 1215 | static char *cpu[] = { /* NB: incomplete and arbitary list */ |
| 1216 | nil, |
| 1217 | /*1*/ "WE32100", |
| 1218 | /*2*/ "SPARC", |
| 1219 | /*3*/ "i386", |
| 1220 | /*4*/ "M68000", |
| 1221 | /*5*/ "M88000", |
| 1222 | /*6*/ "i486", |
| 1223 | /*7*/ "i860", |
| 1224 | /*8*/ "R3000", |
| 1225 | /*9*/ "S370", |
| 1226 | /*10*/ "R4000", |
| 1227 | nil, nil, nil, nil, |
| 1228 | /*15*/ "HP-PA", |
| 1229 | nil, |
| 1230 | nil, |
| 1231 | /*18*/ "sparc v8+", |
| 1232 | /*19*/ "i960", |
| 1233 | /*20*/ "PPC-32", |
| 1234 | /*21*/ "PPC-64", |
| 1235 | nil, nil, nil, nil, |
| 1236 | nil, nil, nil, nil, nil, |
| 1237 | nil, nil, nil, nil, nil, |
| 1238 | nil, nil, nil, nil, |
| 1239 | /*40*/ "ARM", |
| 1240 | /*41*/ "Alpha", |
| 1241 | nil, |
| 1242 | /*43*/ "sparc v9", |
| 1243 | nil, nil, |
| 1244 | nil, nil, nil, nil, |
rsc | b2a905b | 2005-08-31 22:22:39 +0000 | [diff] [blame] | 1245 | /*50*/ "IA-64", |
rsc | bce0902 | 2005-08-11 02:34:29 +0000 | [diff] [blame] | 1246 | nil, nil, nil, nil, nil, |
| 1247 | nil, nil, nil, nil, nil, |
| 1248 | nil, |
| 1249 | /*62*/ "AMD64", |
| 1250 | nil, nil, nil, |
| 1251 | nil, nil, nil, nil, nil, |
| 1252 | nil, nil, nil, nil, |
| 1253 | /*75*/ "VAX", |
rsc | c15ce40 | 2005-08-10 18:25:39 +0000 | [diff] [blame] | 1254 | }; |
| 1255 | |
| 1256 | |
rsc | adee167 | 2005-08-31 03:59:25 +0000 | [diff] [blame] | 1257 | if (memcmp(buf, "\177ELF", 4) == 0){ |
rsc | bce0902 | 2005-08-11 02:34:29 +0000 | [diff] [blame] | 1258 | /* gcc misparses \x7FELF as \x7FE L F */ |
rsc | c15ce40 | 2005-08-10 18:25:39 +0000 | [diff] [blame] | 1259 | if (!mime){ |
| 1260 | int n = (buf[19] << 8) | buf[18]; |
| 1261 | char *p = "unknown"; |
| 1262 | |
| 1263 | if (n > 0 && n < nelem(cpu) && cpu[n]) |
| 1264 | p = cpu[n]; |
| 1265 | else { |
| 1266 | /* try the other byte order */ |
| 1267 | n = (buf[18] << 8) | buf[19]; |
| 1268 | if (n > 0 && n < nelem(cpu) && cpu[n]) |
| 1269 | p = cpu[n]; |
| 1270 | } |
| 1271 | print("%s ELF executable\n", p); |
| 1272 | } |
| 1273 | else |
| 1274 | print("application/x-elf-executable"); |
| 1275 | return 1; |
| 1276 | } |
| 1277 | |
| 1278 | return 0; |
| 1279 | } |