| #include <u.h> |
| #include <libc.h> |
| #include <bio.h> |
| #include <ctype.h> |
| #include <mach.h> |
| |
| /* |
| * file - determine type of file |
| */ |
| #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24)) |
| |
| uchar buf[6001]; |
| short cfreq[140]; |
| short wfreq[50]; |
| int nbuf; |
| Dir* mbuf; |
| int fd; |
| char *fname; |
| char *slash; |
| |
| enum |
| { |
| Cword, |
| Fword, |
| Aword, |
| Alword, |
| Lword, |
| I1, |
| I2, |
| I3, |
| Clatin = 128, |
| Cbinary, |
| Cnull, |
| Ceascii, |
| Cutf, |
| }; |
| struct |
| { |
| char* word; |
| int class; |
| } dict[] = |
| { |
| "PATH", Lword, |
| "TEXT", Aword, |
| "adt", Alword, |
| "aggr", Alword, |
| "alef", Alword, |
| "array", Lword, |
| "block", Fword, |
| "chan", Alword, |
| "char", Cword, |
| "common", Fword, |
| "con", Lword, |
| "data", Fword, |
| "dimension", Fword, |
| "double", Cword, |
| "extern", Cword, |
| "bio", I2, |
| "float", Cword, |
| "fn", Lword, |
| "function", Fword, |
| "h", I3, |
| "implement", Lword, |
| "import", Lword, |
| "include", I1, |
| "int", Cword, |
| "integer", Fword, |
| "iota", Lword, |
| "libc", I2, |
| "long", Cword, |
| "module", Lword, |
| "real", Fword, |
| "ref", Lword, |
| "register", Cword, |
| "self", Lword, |
| "short", Cword, |
| "static", Cword, |
| "stdio", I2, |
| "struct", Cword, |
| "subroutine", Fword, |
| "u", I2, |
| "void", Cword, |
| }; |
| |
| /* codes for 'mode' field in language structure */ |
| enum { |
| Normal = 0, |
| First, /* first entry for language spanning several ranges */ |
| Multi, /* later entries " " " ... */ |
| Shared, /* codes used in several languages */ |
| }; |
| |
| struct |
| { |
| int mode; /* see enum above */ |
| int count; |
| int low; |
| int high; |
| char *name; |
| |
| } language[] = |
| { |
| Normal, 0, 0x0080, 0x0080, "Extended Latin", |
| Normal, 0, 0x0100, 0x01FF, "Extended Latin", |
| Normal, 0, 0x0370, 0x03FF, "Greek", |
| Normal, 0, 0x0400, 0x04FF, "Cyrillic", |
| Normal, 0, 0x0530, 0x058F, "Armenian", |
| Normal, 0, 0x0590, 0x05FF, "Hebrew", |
| Normal, 0, 0x0600, 0x06FF, "Arabic", |
| Normal, 0, 0x0900, 0x097F, "Devanagari", |
| Normal, 0, 0x0980, 0x09FF, "Bengali", |
| Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi", |
| Normal, 0, 0x0A80, 0x0AFF, "Gujarati", |
| Normal, 0, 0x0B00, 0x0B7F, "Oriya", |
| Normal, 0, 0x0B80, 0x0BFF, "Tamil", |
| Normal, 0, 0x0C00, 0x0C7F, "Telugu", |
| Normal, 0, 0x0C80, 0x0CFF, "Kannada", |
| Normal, 0, 0x0D00, 0x0D7F, "Malayalam", |
| Normal, 0, 0x0E00, 0x0E7F, "Thai", |
| Normal, 0, 0x0E80, 0x0EFF, "Lao", |
| Normal, 0, 0x1000, 0x105F, "Tibetan", |
| Normal, 0, 0x10A0, 0x10FF, "Georgian", |
| Normal, 0, 0x3040, 0x30FF, "Japanese", |
| Normal, 0, 0x3100, 0x312F, "Chinese", |
| First, 0, 0x3130, 0x318F, "Korean", |
| Multi, 0, 0x3400, 0x3D2F, "Korean", |
| Shared, 0, 0x4e00, 0x9fff, "CJK", |
| Normal, 0, 0, 0, 0, /* terminal entry */ |
| }; |
| |
| |
| enum |
| { |
| Fascii, /* printable ascii */ |
| Flatin, /* latin 1*/ |
| Futf, /* UTf character set */ |
| Fbinary, /* binary */ |
| Feascii, /* ASCII with control chars */ |
| Fnull, /* NULL in file */ |
| } guess; |
| |
| void bump_utf_count(Rune); |
| int cistrncmp(char*, char*, int); |
| void filetype(int); |
| int getfontnum(uchar*, uchar**); |
| int isas(void); |
| int isc(void); |
| int isenglish(void); |
| int ishp(void); |
| int ishtml(void); |
| int isrfc822(void); |
| int ismbox(void); |
| int islimbo(void); |
| int ismung(void); |
| int isp9bit(void); |
| int isp9font(void); |
| int isrtf(void); |
| int ismsdos(void); |
| int iself(void); |
| int istring(void); |
| int iff(void); |
| int long0(void); |
| int istar(void); |
| int p9bitnum(uchar*); |
| int p9subfont(uchar*); |
| void print_utf(void); |
| void type(char*, int); |
| int utf_count(void); |
| void wordfreq(void); |
| |
| int (*call[])(void) = |
| { |
| long0, /* recognizable by first 4 bytes */ |
| istring, /* recognizable by first string */ |
| iff, /* interchange file format (strings) */ |
| isrfc822, /* email file */ |
| ismbox, /* mail box */ |
| istar, /* recognizable by tar checksum */ |
| ishtml, /* html keywords */ |
| /* iscint, /* compiler/assembler intermediate */ |
| islimbo, /* limbo source */ |
| isc, /* c & alef compiler key words */ |
| isas, /* assembler key words */ |
| ismung, /* entropy compressed/encrypted */ |
| isp9font, /* plan 9 font */ |
| isp9bit, /* plan 9 image (as from /dev/window) */ |
| isenglish, /* char frequency English */ |
| isrtf, /* rich text format */ |
| ismsdos, /* msdos exe (virus file attachement) */ |
| iself, /* ELF (foreign) executable */ |
| 0 |
| }; |
| |
| int mime; |
| |
| #define OCTET "application/octet-stream\n" |
| #define PLAIN "text/plain\n" |
| |
| void |
| main(int argc, char *argv[]) |
| { |
| int i, j, maxlen; |
| char *cp; |
| Rune r; |
| |
| ARGBEGIN{ |
| case 'm': |
| mime = 1; |
| break; |
| default: |
| fprint(2, "usage: file [-m] [file...]\n"); |
| exits("usage"); |
| }ARGEND; |
| |
| maxlen = 0; |
| if(mime == 0 || argc > 1){ |
| for(i = 0; i < argc; i++) { |
| for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp)) |
| ; |
| if(j > maxlen) |
| maxlen = j; |
| } |
| } |
| if (argc <= 0) { |
| if(!mime) |
| print ("stdin: "); |
| filetype(0); |
| } |
| else { |
| for(i = 0; i < argc; i++) |
| type(argv[i], maxlen); |
| } |
| exits(0); |
| } |
| |
| void |
| type(char *file, int nlen) |
| { |
| Rune r; |
| int i; |
| char *p; |
| |
| if(nlen > 0){ |
| slash = 0; |
| for (i = 0, p = file; *p; i++) { |
| if (*p == '/') /* find rightmost slash */ |
| slash = p; |
| p += chartorune(&r, p); /* count runes */ |
| } |
| print("%s:%*s",file, nlen-i+1, ""); |
| } |
| fname = file; |
| if ((fd = open(file, OREAD)) < 0) { |
| print("cannot open\n"); |
| return; |
| } |
| filetype(fd); |
| close(fd); |
| } |
| |
| void |
| filetype(int fd) |
| { |
| Rune r; |
| int i, f, n; |
| char *p, *eob; |
| |
| free(mbuf); |
| mbuf = dirfstat(fd); |
| if(mbuf == nil){ |
| print("cannot stat: %r\n"); |
| return; |
| } |
| if(mbuf->mode & DMDIR) { |
| print(mime ? "text/directory\n" : "directory\n"); |
| return; |
| } |
| if(mbuf->type != 'M' && mbuf->type != '|') { |
| print(mime ? OCTET : "special file #%c/%s\n", |
| mbuf->type, mbuf->name); |
| return; |
| } |
| nbuf = read(fd, buf, sizeof(buf)-1); |
| |
| if(nbuf < 0) { |
| print("cannot read\n"); |
| return; |
| } |
| if(nbuf == 0) { |
| print(mime ? PLAIN : "empty file\n"); |
| return; |
| } |
| buf[nbuf] = 0; |
| |
| /* |
| * build histogram table |
| */ |
| memset(cfreq, 0, sizeof(cfreq)); |
| for (i = 0; language[i].name; i++) |
| language[i].count = 0; |
| eob = (char *)buf+nbuf; |
| for(n = 0, p = (char *)buf; p < eob; n++) { |
| if (!fullrune(p, eob-p) && eob-p < UTFmax) |
| break; |
| p += chartorune(&r, p); |
| if (r == 0) |
| f = Cnull; |
| else if (r <= 0x7f) { |
| if (!isprint(r) && !isspace(r)) |
| f = Ceascii; /* ASCII control char */ |
| else f = r; |
| } else if (r == 0x080) { |
| bump_utf_count(r); |
| f = Cutf; |
| } else if (r < 0xA0) |
| f = Cbinary; /* Invalid Runes */ |
| else if (r <= 0xff) |
| f = Clatin; /* Latin 1 */ |
| else { |
| bump_utf_count(r); |
| f = Cutf; /* UTF extension */ |
| } |
| cfreq[f]++; /* ASCII chars peg directly */ |
| } |
| /* |
| * gross classify |
| */ |
| if (cfreq[Cbinary]) |
| guess = Fbinary; |
| else if (cfreq[Cutf]) |
| guess = Futf; |
| else if (cfreq[Clatin]) |
| guess = Flatin; |
| else if (cfreq[Ceascii]) |
| guess = Feascii; |
| else if (cfreq[Cnull] == n) { |
| print(mime ? OCTET : "first block all null bytes\n"); |
| return; |
| } |
| else guess = Fascii; |
| /* |
| * lookup dictionary words |
| */ |
| memset(wfreq, 0, sizeof(wfreq)); |
| if(guess == Fascii || guess == Flatin || guess == Futf) |
| wordfreq(); |
| /* |
| * call individual classify routines |
| */ |
| for(i=0; call[i]; i++) |
| if((*call[i])()) |
| return; |
| |
| /* |
| * if all else fails, |
| * print out gross classification |
| */ |
| if (nbuf < 100 && !mime) |
| print(mime ? PLAIN : "short "); |
| if (guess == Fascii) |
| print(mime ? PLAIN : "Ascii\n"); |
| else if (guess == Feascii) |
| print(mime ? PLAIN : "extended ascii\n"); |
| else if (guess == Flatin) |
| print(mime ? PLAIN : "latin ascii\n"); |
| else if (guess == Futf && utf_count() < 4) |
| print_utf(); |
| else print(mime ? OCTET : "binary\n"); |
| } |
| |
| void |
| bump_utf_count(Rune r) |
| { |
| int low, high, mid; |
| |
| high = sizeof(language)/sizeof(language[0])-1; |
| for (low = 0; low < high;) { |
| mid = (low+high)/2; |
| if (r >=language[mid].low) { |
| if (r <= language[mid].high) { |
| language[mid].count++; |
| break; |
| } else low = mid+1; |
| } else high = mid; |
| } |
| } |
| |
| int |
| utf_count(void) |
| { |
| int i, count; |
| |
| count = 0; |
| for (i = 0; language[i].name; i++) |
| if (language[i].count > 0) |
| switch (language[i].mode) { |
| case Normal: |
| case First: |
| count++; |
| break; |
| default: |
| break; |
| } |
| return count; |
| } |
| |
| int |
| chkascii(void) |
| { |
| int i; |
| |
| for (i = 'a'; i < 'z'; i++) |
| if (cfreq[i]) |
| return 1; |
| for (i = 'A'; i < 'Z'; i++) |
| if (cfreq[i]) |
| return 1; |
| return 0; |
| } |
| |
| int |
| find_first(char *name) |
| { |
| int i; |
| |
| for (i = 0; language[i].name != 0; i++) |
| if (language[i].mode == First |
| && strcmp(language[i].name, name) == 0) |
| return i; |
| return -1; |
| } |
| |
| void |
| print_utf(void) |
| { |
| int i, printed, j; |
| |
| if(mime){ |
| print(PLAIN); |
| return; |
| } |
| if (chkascii()) { |
| printed = 1; |
| print("Ascii"); |
| } else |
| printed = 0; |
| for (i = 0; language[i].name; i++) |
| if (language[i].count) { |
| switch(language[i].mode) { |
| case Multi: |
| j = find_first(language[i].name); |
| if (j < 0) |
| break; |
| if (language[j].count > 0) |
| break; |
| /* Fall through */ |
| case Normal: |
| case First: |
| if (printed) |
| print(" & "); |
| else printed = 1; |
| print("%s", language[i].name); |
| break; |
| case Shared: |
| default: |
| break; |
| } |
| } |
| if(!printed) |
| print("UTF"); |
| print(" text\n"); |
| } |
| |
| void |
| wordfreq(void) |
| { |
| int low, high, mid, r; |
| uchar *p, *p2, c; |
| |
| p = buf; |
| for(;;) { |
| while (p < buf+nbuf && !isalpha(*p)) |
| p++; |
| if (p >= buf+nbuf) |
| return; |
| p2 = p; |
| while(p < buf+nbuf && isalpha(*p)) |
| p++; |
| c = *p; |
| *p = 0; |
| high = sizeof(dict)/sizeof(dict[0]); |
| for(low = 0;low < high;) { |
| mid = (low+high)/2; |
| r = strcmp(dict[mid].word, (char*)p2); |
| if(r == 0) { |
| wfreq[dict[mid].class]++; |
| break; |
| } |
| if(r < 0) |
| low = mid+1; |
| else |
| high = mid; |
| } |
| *p++ = c; |
| } |
| } |
| |
| typedef struct Filemagic Filemagic; |
| struct Filemagic { |
| ulong x; |
| ulong mask; |
| char *desc; |
| char *mime; |
| }; |
| |
| Filemagic long0tab[] = { |
| 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file\n", OCTET, |
| 0x31636170, 0xFFFFFFFF, "pac3 audio file\n", OCTET, |
| 0x32636170, 0xFFFF00FF, "pac4 audio file\n", OCTET, |
| 0xBA010000, 0xFFFFFFFF, "mpeg system stream\n", OCTET, |
| 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET, |
| 0x04034B50, 0xFFFFFFFF, "zip archive\n", "application/zip", |
| 070707, 0xFFFF, "cpio archive\n", OCTET, |
| 0x2F7, 0xFFFF, "tex dvi\n", "application/dvi", |
| 0xfffa0000, 0xfffe0000, "mp3 audio", "audio/mpeg", |
| }; |
| |
| int |
| filemagic(Filemagic *tab, int ntab, ulong x) |
| { |
| int i; |
| |
| for(i=0; i<ntab; i++) |
| if((x&tab[i].mask) == tab[i].x){ |
| print(mime ? tab[i].mime : tab[i].desc); |
| return 1; |
| } |
| return 0; |
| } |
| |
| int |
| long0(void) |
| { |
| /* Fhdr *f; */ |
| long x; |
| |
| seek(fd, 0, 0); /* reposition to start of file */ |
| /* |
| if(crackhdr(fd, &f)) { |
| print(mime ? OCTET : "%s\n", f.name); |
| return 1; |
| } |
| */ |
| x = LENDIAN(buf); |
| if(filemagic(long0tab, nelem(long0tab), x)) |
| return 1; |
| return 0; |
| } |
| |
| /* from tar.c */ |
| enum { NAMSIZ = 100, TBLOCK = 512 }; |
| |
| union hblock |
| { |
| char dummy[TBLOCK]; |
| struct header |
| { |
| char name[NAMSIZ]; |
| char mode[8]; |
| char uid[8]; |
| char gid[8]; |
| char size[12]; |
| char mtime[12]; |
| char chksum[8]; |
| char linkflag; |
| char linkname[NAMSIZ]; |
| /* rest are defined by POSIX's ustar format; see p1003.2b */ |
| char magic[6]; /* "ustar" */ |
| char version[2]; |
| char uname[32]; |
| char gname[32]; |
| char devmajor[8]; |
| char devminor[8]; |
| char prefix[155]; /* if non-null, path = prefix "/" name */ |
| } dbuf; |
| }; |
| |
| int |
| checksum(union hblock *hp) |
| { |
| int i; |
| char *cp; |
| struct header *hdr = &hp->dbuf; |
| |
| for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++) |
| *cp = ' '; |
| i = 0; |
| for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++) |
| i += *cp & 0xff; |
| return i; |
| } |
| |
| int |
| istar(void) |
| { |
| int chksum; |
| char tblock[TBLOCK]; |
| union hblock *hp = (union hblock *)tblock; |
| struct header *hdr = &hp->dbuf; |
| |
| seek(fd, 0, 0); /* reposition to start of file */ |
| if (readn(fd, tblock, sizeof tblock) != sizeof tblock) |
| return 0; |
| chksum = strtol(hdr->chksum, 0, 8); |
| if (hdr->name[0] != '\0' && checksum(hp) == chksum) { |
| if (strcmp(hdr->magic, "ustar") == 0) |
| print(mime? "application/x-ustar\n": |
| "posix tar archive\n"); |
| else |
| print(mime? "application/x-tar\n": "tar archive\n"); |
| return 1; |
| } |
| return 0; |
| } |
| |
| /* |
| * initial words to classify file |
| */ |
| struct FILE_STRING |
| { |
| char *key; |
| char *filetype; |
| int length; |
| char *mime; |
| } file_string[] = |
| { |
| "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream", |
| "!<arch>\n", "archive", 8, "application/octet-stream", |
| "070707", "cpio archive - ascii header", 6, "application/octet-stream", |
| "%!", "postscript", 2, "application/postscript", |
| "\004%!", "postscript", 3, "application/postscript", |
| "x T post", "troff output for post", 8, "application/troff", |
| "x T Latin1", "troff output for Latin1", 10, "application/troff", |
| "x T utf", "troff output for UTF", 7, "application/troff", |
| "x T 202", "troff output for 202", 7, "application/troff", |
| "x T aps", "troff output for aps", 7, "application/troff", |
| "GIF", "GIF image", 3, "image/gif", |
| "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript", |
| "%PDF", "PDF", 4, "application/pdf", |
| "<html>\n", "HTML file", 7, "text/html", |
| "<HTML>\n", "HTML file", 7, "text/html", |
| "compressed\n", "Compressed image or subfont", 11, "application/octet-stream", |
| "\111\111\052\000", "tiff", 4, "image/tiff", |
| "\115\115\000\052", "tiff", 4, "image/tiff", |
| "\377\330\377\340", "jpeg", 4, "image/jpeg", |
| "\377\330\377\341", "jpeg", 4, "image/jpeg", |
| "\377\330\377\333", "jpeg", 4, "image/jpeg", |
| "\106\117\126\142", "x3f", 4, "image/x3f", |
| "BM", "bmp", 2, "image/bmp", |
| "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/octet-stream", |
| "<MakerFile ", "FrameMaker file", 11, "application/framemaker", |
| "\033%-12345X", "HPJCL file", 9, "application/hpjcl", |
| "ID3", "mp3 audio with id3", 3, "audio/mpeg", |
| 0,0,0,0 |
| }; |
| |
| int |
| istring(void) |
| { |
| int i, j; |
| struct FILE_STRING *p; |
| |
| for(p = file_string; p->key; p++) { |
| if(nbuf >= p->length && !memcmp(buf, p->key, p->length)) { |
| if(mime) |
| print("%s\n", p->mime); |
| else |
| print("%s\n", p->filetype); |
| return 1; |
| } |
| } |
| if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */ |
| for(i = 5; i < nbuf; i++) |
| if(buf[i] == '\n') |
| break; |
| if(mime) |
| print(OCTET); |
| else |
| print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5); |
| return 1; |
| } |
| if(buf[0]=='#' && buf[1]=='!'){ |
| i=2; |
| for(j=2; j < nbuf && buf[j] != ' ' && buf[j] != '\n' && buf[j] != '\r'; j++) |
| if(buf[j] == '/') |
| i = j+1; |
| if(mime) |
| print(PLAIN); |
| else |
| print("%.*s executable file script\n", utfnlen((char*)buf+i, j-i), (char*)buf+i); |
| return 1; |
| } |
| return 0; |
| } |
| |
| int |
| iff(void) |
| { |
| if (strncmp((char*)buf, "FORM", 4) == 0 && |
| strncmp((char*)buf+8, "AIFF", 4) == 0) { |
| print("%s\n", mime? "audio/x-aiff": "aiff audio"); |
| return 1; |
| } |
| return 0; |
| } |
| |
| char* html_string[] = |
| { |
| "title", |
| "body", |
| "head", |
| "strong", |
| "h1", |
| "h2", |
| "h3", |
| "h4", |
| "h5", |
| "h6", |
| "ul", |
| "li", |
| "dl", |
| "br", |
| "em", |
| 0, |
| }; |
| |
| int |
| ishtml(void) |
| { |
| uchar *p, *q; |
| int i, count; |
| |
| /* compare strings between '<' and '>' to html table */ |
| count = 0; |
| p = buf; |
| for(;;) { |
| while (p < buf+nbuf && *p != '<') |
| p++; |
| p++; |
| if (p >= buf+nbuf) |
| break; |
| if(*p == '/') |
| p++; |
| q = p; |
| while(p < buf+nbuf && *p != '>') |
| p++; |
| if (p >= buf+nbuf) |
| break; |
| for(i = 0; html_string[i]; i++) { |
| if(cistrncmp(html_string[i], (char*)q, p-q) == 0) { |
| if(count++ > 4) { |
| print(mime ? "text/html\n" : "HTML file\n"); |
| return 1; |
| } |
| break; |
| } |
| } |
| p++; |
| } |
| return 0; |
| } |
| |
| char* rfc822_string[] = |
| { |
| "from:", |
| "date:", |
| "to:", |
| "subject:", |
| "received:", |
| "reply to:", |
| "sender:", |
| 0, |
| }; |
| |
| int |
| isrfc822(void) |
| { |
| |
| char *p, *q, *r; |
| int i, count; |
| |
| count = 0; |
| p = (char*)buf; |
| for(;;) { |
| q = strchr(p, '\n'); |
| if(q == nil) |
| break; |
| *q = 0; |
| if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){ |
| count++; |
| *q = '\n'; |
| p = q+1; |
| continue; |
| } |
| *q = '\n'; |
| if(*p != '\t' && *p != ' '){ |
| r = strchr(p, ':'); |
| if(r == 0 || r > q) |
| break; |
| for(i = 0; rfc822_string[i]; i++) { |
| if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){ |
| count++; |
| break; |
| } |
| } |
| } |
| p = q+1; |
| } |
| if(count >= 3){ |
| print(mime ? "message/rfc822\n" : "email file\n"); |
| return 1; |
| } |
| return 0; |
| } |
| |
| int |
| ismbox(void) |
| { |
| char *p, *q; |
| |
| p = (char*)buf; |
| q = strchr(p, '\n'); |
| if(q == nil) |
| return 0; |
| *q = 0; |
| if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){ |
| print(mime ? "text/plain\n" : "mail box\n"); |
| return 1; |
| } |
| *q = '\n'; |
| return 0; |
| } |
| |
| int |
| isc(void) |
| { |
| int n; |
| |
| n = wfreq[I1]; |
| /* |
| * includes |
| */ |
| if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n) |
| goto yes; |
| if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n) |
| goto yes; |
| /* |
| * declarations |
| */ |
| if(wfreq[Cword] >= 5 && cfreq[';'] >= 5) |
| goto yes; |
| /* |
| * assignments |
| */ |
| if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1) |
| goto yes; |
| return 0; |
| |
| yes: |
| if(mime){ |
| print(PLAIN); |
| return 1; |
| } |
| if(wfreq[Alword] > 0) |
| print("alef program\n"); |
| else |
| print("c program\n"); |
| return 1; |
| } |
| |
| int |
| islimbo(void) |
| { |
| |
| /* |
| * includes |
| */ |
| if(wfreq[Lword] < 4) |
| return 0; |
| print(mime ? PLAIN : "limbo program\n"); |
| return 1; |
| } |
| |
| int |
| isas(void) |
| { |
| |
| /* |
| * includes |
| */ |
| if(wfreq[Aword] < 2) |
| return 0; |
| print(mime ? PLAIN : "as program\n"); |
| return 1; |
| } |
| |
| /* |
| * low entropy means encrypted |
| */ |
| int |
| ismung(void) |
| { |
| int i, bucket[8]; |
| float cs; |
| |
| if(nbuf < 64) |
| return 0; |
| memset(bucket, 0, sizeof(bucket)); |
| for(i=0; i<64; i++) |
| bucket[(buf[i]>>5)&07] += 1; |
| |
| cs = 0.; |
| for(i=0; i<8; i++) |
| cs += (bucket[i]-8)*(bucket[i]-8); |
| cs /= 8.; |
| if(cs <= 24.322) { |
| if(buf[0]==0x1f && (buf[1]==0x8b || buf[1]==0x9d)) |
| print(mime ? OCTET : "compressed\n"); |
| else |
| print(mime ? OCTET : "encrypted\n"); |
| return 1; |
| } |
| return 0; |
| } |
| |
| /* |
| * english by punctuation and frequencies |
| */ |
| int |
| isenglish(void) |
| { |
| int vow, comm, rare, badpun, punct; |
| char *p; |
| |
| if(guess != Fascii && guess != Feascii) |
| return 0; |
| badpun = 0; |
| punct = 0; |
| for(p = (char *)buf; p < (char *)buf+nbuf-1; p++) |
| switch(*p) { |
| case '.': |
| case ',': |
| case ')': |
| case '%': |
| case ';': |
| case ':': |
| case '?': |
| punct++; |
| if(p[1] != ' ' && p[1] != '\n') |
| badpun++; |
| } |
| if(badpun*5 > punct) |
| return 0; |
| if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */ |
| return 0; |
| if(2*cfreq[';'] > cfreq['e']) |
| return 0; |
| |
| vow = 0; |
| for(p="AEIOU"; *p; p++) { |
| vow += cfreq[(uchar)*p]; |
| vow += cfreq[tolower((uchar)*p)]; |
| } |
| comm = 0; |
| for(p="ETAION"; *p; p++) { |
| comm += cfreq[(uchar)*p]; |
| comm += cfreq[tolower((uchar)*p)]; |
| } |
| rare = 0; |
| for(p="VJKQXZ"; *p; p++) { |
| rare += cfreq[(uchar)*p]; |
| rare += cfreq[tolower((uchar)*p)]; |
| } |
| if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) { |
| print(mime ? PLAIN : "English text\n"); |
| return 1; |
| } |
| return 0; |
| } |
| |
| /* |
| * pick up a number with |
| * syntax _*[0-9]+_ |
| */ |
| #define P9BITLEN 12 |
| int |
| p9bitnum(uchar *bp) |
| { |
| int n, c, len; |
| |
| len = P9BITLEN; |
| while(*bp == ' ') { |
| bp++; |
| len--; |
| if(len <= 0) |
| return -1; |
| } |
| n = 0; |
| while(len > 1) { |
| c = *bp++; |
| if(!isdigit(c)) |
| return -1; |
| n = n*10 + c-'0'; |
| len--; |
| } |
| if(*bp != ' ') |
| return -1; |
| return n; |
| } |
| |
| int |
| depthof(char *s, int *newp) |
| { |
| char *es; |
| int d; |
| |
| *newp = 0; |
| es = s+12; |
| while(s<es && *s==' ') |
| s++; |
| if(s == es) |
| return -1; |
| if('0'<=*s && *s<='9') |
| return 1<<atoi(s); |
| |
| *newp = 1; |
| d = 0; |
| while(s<es && *s!=' '){ |
| s++; /* skip letter */ |
| d += strtoul(s, &s, 10); |
| } |
| |
| switch(d){ |
| case 32: |
| case 24: |
| case 16: |
| case 8: |
| return d; |
| } |
| return -1; |
| } |
| |
| int |
| isp9bit(void) |
| { |
| int dep, lox, loy, hix, hiy, px, new; |
| ulong t; |
| long len; |
| char *newlabel; |
| |
| newlabel = "old "; |
| |
| dep = depthof((char*)buf + 0*P9BITLEN, &new); |
| if(new) |
| newlabel = ""; |
| lox = p9bitnum(buf + 1*P9BITLEN); |
| loy = p9bitnum(buf + 2*P9BITLEN); |
| hix = p9bitnum(buf + 3*P9BITLEN); |
| hiy = p9bitnum(buf + 4*P9BITLEN); |
| if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0) |
| return 0; |
| |
| if(dep < 8){ |
| px = 8/dep; /* pixels per byte */ |
| /* set l to number of bytes of data per scan line */ |
| if(lox >= 0) |
| len = (hix+px-1)/px - lox/px; |
| else{ /* make positive before divide */ |
| t = (-lox)+px-1; |
| t = (t/px)*px; |
| len = (t+hix+px-1)/px; |
| } |
| }else |
| len = (hix-lox)*dep/8; |
| len *= (hiy-loy); /* col length */ |
| len += 5*P9BITLEN; /* size of initial ascii */ |
| |
| /* |
| * for image file, length is non-zero and must match calculation above |
| * for /dev/window and /dev/screen the length is always zero |
| * for subfont, the subfont header should follow immediately. |
| */ |
| if (len != 0 && mbuf->length == 0) { |
| print("%splan 9 image\n", newlabel); |
| return 1; |
| } |
| if (mbuf->length == len) { |
| print("%splan 9 image\n", newlabel); |
| return 1; |
| } |
| /* Ghostscript sometimes produces a little extra on the end */ |
| if (mbuf->length < len+P9BITLEN) { |
| print("%splan 9 image\n", newlabel); |
| return 1; |
| } |
| if (p9subfont(buf+len)) { |
| print("%ssubfont file\n", newlabel); |
| return 1; |
| } |
| return 0; |
| } |
| |
| int |
| p9subfont(uchar *p) |
| { |
| int n, h, a; |
| |
| /* if image too big, assume it's a subfont */ |
| if (p+3*P9BITLEN > buf+sizeof(buf)) |
| return 1; |
| |
| n = p9bitnum(p + 0*P9BITLEN); /* char count */ |
| if (n < 0) |
| return 0; |
| h = p9bitnum(p + 1*P9BITLEN); /* height */ |
| if (h < 0) |
| return 0; |
| a = p9bitnum(p + 2*P9BITLEN); /* ascent */ |
| if (a < 0) |
| return 0; |
| return 1; |
| } |
| |
| #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') |
| |
| int |
| isp9font(void) |
| { |
| uchar *cp, *p; |
| int i, n; |
| char pathname[1024]; |
| |
| cp = buf; |
| if (!getfontnum(cp, &cp)) /* height */ |
| return 0; |
| if (!getfontnum(cp, &cp)) /* ascent */ |
| return 0; |
| for (i = 0; 1; i++) { |
| if (!getfontnum(cp, &cp)) /* min */ |
| break; |
| if (!getfontnum(cp, &cp)) /* max */ |
| return 0; |
| while (WHITESPACE(*cp)) |
| cp++; |
| for (p = cp; *cp && !WHITESPACE(*cp); cp++) |
| ; |
| /* construct a path name, if needed */ |
| n = 0; |
| if (*p != '/' && slash) { |
| n = slash-fname+1; |
| if (n < sizeof(pathname)) |
| memcpy(pathname, fname, n); |
| else n = 0; |
| } |
| if (n+cp-p < sizeof(pathname)) { |
| memcpy(pathname+n, p, cp-p); |
| n += cp-p; |
| pathname[n] = 0; |
| if (access(pathname, AEXIST) < 0) |
| return 0; |
| } |
| } |
| if (i) { |
| print(mime ? "text/plain\n" : "font file\n"); |
| return 1; |
| } |
| return 0; |
| } |
| |
| int |
| getfontnum(uchar *cp, uchar **rp) |
| { |
| while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */ |
| cp++; |
| if (*cp < '0' || *cp > '9') |
| return 0; |
| strtoul((char *)cp, (char **)rp, 0); |
| if (!WHITESPACE(**rp)) |
| return 0; |
| return 1; |
| } |
| |
| int |
| isrtf(void) |
| { |
| if(strstr((char *)buf, "\\rtf1")){ |
| print(mime ? "application/rtf\n" : "rich text format\n"); |
| return 1; |
| } |
| return 0; |
| } |
| |
| int |
| ismsdos(void) |
| { |
| if (buf[0] == 0x4d && buf[1] == 0x5a){ |
| print(mime ? "application/x-msdownload\n" : "MSDOS executable\n"); |
| return 1; |
| } |
| return 0; |
| } |
| |
| int |
| iself(void) |
| { |
| static char *cpu[] = { /* NB: incomplete and arbitary list */ |
| nil, |
| /*1*/ "WE32100", |
| /*2*/ "SPARC", |
| /*3*/ "i386", |
| /*4*/ "M68000", |
| /*5*/ "M88000", |
| /*6*/ "i486", |
| /*7*/ "i860", |
| /*8*/ "R3000", |
| /*9*/ "S370", |
| /*10*/ "R4000", |
| nil, nil, nil, nil, |
| /*15*/ "HP-PA", |
| nil, |
| nil, |
| /*18*/ "sparc v8+", |
| /*19*/ "i960", |
| /*20*/ "PPC-32", |
| /*21*/ "PPC-64", |
| nil, nil, nil, nil, |
| nil, nil, nil, nil, nil, |
| nil, nil, nil, nil, nil, |
| nil, nil, nil, nil, |
| /*40*/ "ARM", |
| /*41*/ "Alpha", |
| nil, |
| /*43*/ "sparc v9", |
| nil, nil, |
| nil, nil, nil, nil, |
| /*50*/ "IA-64", |
| nil, nil, nil, nil, nil, |
| nil, nil, nil, nil, nil, |
| nil, |
| /*62*/ "AMD64", |
| nil, nil, nil, |
| nil, nil, nil, nil, nil, |
| nil, nil, nil, nil, |
| /*75*/ "VAX", |
| }; |
| |
| |
| if (memcmp(buf, "\177ELF", 4) == 0){ |
| /* gcc misparses \x7FELF as \x7FE L F */ |
| if (!mime){ |
| int n = (buf[19] << 8) | buf[18]; |
| char *p = "unknown"; |
| |
| if (n > 0 && n < nelem(cpu) && cpu[n]) |
| p = cpu[n]; |
| else { |
| /* try the other byte order */ |
| n = (buf[18] << 8) | buf[19]; |
| if (n > 0 && n < nelem(cpu) && cpu[n]) |
| p = cpu[n]; |
| } |
| print("%s ELF executable\n", p); |
| } |
| else |
| print("application/x-elf-executable"); |
| return 1; |
| } |
| |
| return 0; |
| } |