|  | #include <u.h> | 
|  | #include <libc.h> | 
|  | #include <bio.h> | 
|  | #include <ctype.h> | 
|  | #include <mach.h> | 
|  |  | 
|  | /* | 
|  | * file - determine type of file | 
|  | */ | 
|  | #define	LENDIAN(p)	((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24)) | 
|  |  | 
|  | uchar	buf[6001]; | 
|  | short	cfreq[140]; | 
|  | short	wfreq[50]; | 
|  | int	nbuf; | 
|  | Dir*	mbuf; | 
|  | int	fd; | 
|  | char 	*fname; | 
|  | char	*slash; | 
|  |  | 
|  | enum | 
|  | { | 
|  | Cword, | 
|  | Fword, | 
|  | Aword, | 
|  | Alword, | 
|  | Lword, | 
|  | I1, | 
|  | I2, | 
|  | I3, | 
|  | Clatin	= 128, | 
|  | Cbinary, | 
|  | Cnull, | 
|  | Ceascii, | 
|  | Cutf, | 
|  | }; | 
|  | struct | 
|  | { | 
|  | char*	word; | 
|  | int	class; | 
|  | } dict[] = | 
|  | { | 
|  | "PATH",		Lword, | 
|  | "TEXT",		Aword, | 
|  | "adt",		Alword, | 
|  | "aggr",		Alword, | 
|  | "alef",		Alword, | 
|  | "array",	Lword, | 
|  | "block",	Fword, | 
|  | "chan",		Alword, | 
|  | "char",		Cword, | 
|  | "common",	Fword, | 
|  | "con",		Lword, | 
|  | "data",		Fword, | 
|  | "dimension",	Fword, | 
|  | "double",	Cword, | 
|  | "extern",	Cword, | 
|  | "bio",		I2, | 
|  | "float",	Cword, | 
|  | "fn",		Lword, | 
|  | "function",	Fword, | 
|  | "h",		I3, | 
|  | "implement",	Lword, | 
|  | "import",	Lword, | 
|  | "include",	I1, | 
|  | "int",		Cword, | 
|  | "integer",	Fword, | 
|  | "iota",		Lword, | 
|  | "libc",		I2, | 
|  | "long",		Cword, | 
|  | "module",	Lword, | 
|  | "real",		Fword, | 
|  | "ref",		Lword, | 
|  | "register",	Cword, | 
|  | "self",		Lword, | 
|  | "short",	Cword, | 
|  | "static",	Cword, | 
|  | "stdio",	I2, | 
|  | "struct",	Cword, | 
|  | "subroutine",	Fword, | 
|  | "u",		I2, | 
|  | "void",		Cword, | 
|  | }; | 
|  |  | 
|  | /* codes for 'mode' field in language structure */ | 
|  | enum	{ | 
|  | Normal	= 0, | 
|  | First,		/* first entry for language spanning several ranges */ | 
|  | Multi,		/* later entries "   "       "  ... */ | 
|  | Shared,		/* codes used in several languages */ | 
|  | }; | 
|  |  | 
|  | struct | 
|  | { | 
|  | int	mode;		/* see enum above */ | 
|  | int 	count; | 
|  | int	low; | 
|  | int	high; | 
|  | char	*name; | 
|  |  | 
|  | } language[] = | 
|  | { | 
|  | Normal, 0,	0x0080, 0x0080,	"Extended Latin", | 
|  | Normal,	0,	0x0100,	0x01FF,	"Extended Latin", | 
|  | Normal,	0,	0x0370,	0x03FF,	"Greek", | 
|  | Normal,	0,	0x0400,	0x04FF,	"Cyrillic", | 
|  | Normal,	0,	0x0530,	0x058F,	"Armenian", | 
|  | Normal,	0,	0x0590,	0x05FF,	"Hebrew", | 
|  | Normal,	0,	0x0600,	0x06FF,	"Arabic", | 
|  | Normal,	0,	0x0900,	0x097F,	"Devanagari", | 
|  | Normal,	0,	0x0980,	0x09FF,	"Bengali", | 
|  | Normal,	0,	0x0A00,	0x0A7F,	"Gurmukhi", | 
|  | Normal,	0,	0x0A80,	0x0AFF,	"Gujarati", | 
|  | Normal,	0,	0x0B00,	0x0B7F,	"Oriya", | 
|  | Normal,	0,	0x0B80,	0x0BFF,	"Tamil", | 
|  | Normal,	0,	0x0C00,	0x0C7F,	"Telugu", | 
|  | Normal,	0,	0x0C80,	0x0CFF,	"Kannada", | 
|  | Normal,	0,	0x0D00,	0x0D7F,	"Malayalam", | 
|  | Normal,	0,	0x0E00,	0x0E7F,	"Thai", | 
|  | Normal,	0,	0x0E80,	0x0EFF,	"Lao", | 
|  | Normal,	0,	0x1000,	0x105F,	"Tibetan", | 
|  | Normal,	0,	0x10A0,	0x10FF,	"Georgian", | 
|  | Normal,	0,	0x3040,	0x30FF,	"Japanese", | 
|  | Normal,	0,	0x3100,	0x312F,	"Chinese", | 
|  | First,	0,	0x3130,	0x318F,	"Korean", | 
|  | Multi,	0,	0x3400,	0x3D2F,	"Korean", | 
|  | Shared,	0,	0x4e00,	0x9fff,	"CJK", | 
|  | Normal,	0,	0,	0,	0,		/* terminal entry */ | 
|  | }; | 
|  |  | 
|  |  | 
|  | enum | 
|  | { | 
|  | Fascii,		/* printable ascii */ | 
|  | Flatin,		/* latin 1*/ | 
|  | Futf,		/* UTf character set */ | 
|  | Fbinary,	/* binary */ | 
|  | Feascii,	/* ASCII with control chars */ | 
|  | Fnull,		/* NULL in file */ | 
|  | } guess; | 
|  |  | 
|  | void	bump_utf_count(Rune); | 
|  | int	cistrncmp(char*, char*, int); | 
|  | void	filetype(int); | 
|  | int	getfontnum(uchar*, uchar**); | 
|  | int	isas(void); | 
|  | int	isc(void); | 
|  | int	isenglish(void); | 
|  | int	ishp(void); | 
|  | int	ishtml(void); | 
|  | int	isrfc822(void); | 
|  | int	ismbox(void); | 
|  | int	islimbo(void); | 
|  | int	ismung(void); | 
|  | int	isp9bit(void); | 
|  | int	isp9font(void); | 
|  | int	isrtf(void); | 
|  | int	ismsdos(void); | 
|  | int	iself(void); | 
|  | int	istring(void); | 
|  | int	iff(void); | 
|  | int	long0(void); | 
|  | int	istar(void); | 
|  | int	p9bitnum(uchar*); | 
|  | int	p9subfont(uchar*); | 
|  | void	print_utf(void); | 
|  | void	type(char*, int); | 
|  | int	utf_count(void); | 
|  | void	wordfreq(void); | 
|  |  | 
|  | int	(*call[])(void) = | 
|  | { | 
|  | long0,		/* recognizable by first 4 bytes */ | 
|  | istring,	/* recognizable by first string */ | 
|  | iff,		/* interchange file format (strings) */ | 
|  | isrfc822,	/* email file */ | 
|  | ismbox,		/* mail box */ | 
|  | istar,		/* recognizable by tar checksum */ | 
|  | ishtml,		/* html keywords */ | 
|  | /*	iscint,		/* compiler/assembler intermediate */ | 
|  | islimbo,	/* limbo source */ | 
|  | isc,		/* c & alef compiler key words */ | 
|  | isas,		/* assembler key words */ | 
|  | ismung,		/* entropy compressed/encrypted */ | 
|  | isp9font,	/* plan 9 font */ | 
|  | isp9bit,	/* plan 9 image (as from /dev/window) */ | 
|  | isenglish,	/* char frequency English */ | 
|  | isrtf,		/* rich text format */ | 
|  | ismsdos,	/* msdos exe (virus file attachement) */ | 
|  | iself,		/* ELF (foreign) executable */ | 
|  | 0 | 
|  | }; | 
|  |  | 
|  | int mime; | 
|  |  | 
|  | #define OCTET	"application/octet-stream\n" | 
|  | #define PLAIN	"text/plain\n" | 
|  |  | 
|  | void | 
|  | main(int argc, char *argv[]) | 
|  | { | 
|  | int i, j, maxlen; | 
|  | char *cp; | 
|  | Rune r; | 
|  |  | 
|  | ARGBEGIN{ | 
|  | case 'm': | 
|  | mime = 1; | 
|  | break; | 
|  | default: | 
|  | fprint(2, "usage: file [-m] [file...]\n"); | 
|  | exits("usage"); | 
|  | }ARGEND; | 
|  |  | 
|  | maxlen = 0; | 
|  | if(mime == 0 || argc > 1){ | 
|  | for(i = 0; i < argc; i++) { | 
|  | for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp)) | 
|  | ; | 
|  | if(j > maxlen) | 
|  | maxlen = j; | 
|  | } | 
|  | } | 
|  | if (argc <= 0) { | 
|  | if(!mime) | 
|  | print ("stdin: "); | 
|  | filetype(0); | 
|  | } | 
|  | else { | 
|  | for(i = 0; i < argc; i++) | 
|  | type(argv[i], maxlen); | 
|  | } | 
|  | exits(0); | 
|  | } | 
|  |  | 
|  | void | 
|  | type(char *file, int nlen) | 
|  | { | 
|  | Rune r; | 
|  | int i; | 
|  | char *p; | 
|  |  | 
|  | if(nlen > 0){ | 
|  | slash = 0; | 
|  | for (i = 0, p = file; *p; i++) { | 
|  | if (*p == '/')			/* find rightmost slash */ | 
|  | slash = p; | 
|  | p += chartorune(&r, p);		/* count runes */ | 
|  | } | 
|  | print("%s:%*s",file, nlen-i+1, ""); | 
|  | } | 
|  | fname = file; | 
|  | if ((fd = open(file, OREAD)) < 0) { | 
|  | print("cannot open\n"); | 
|  | return; | 
|  | } | 
|  | filetype(fd); | 
|  | close(fd); | 
|  | } | 
|  |  | 
|  | void | 
|  | filetype(int fd) | 
|  | { | 
|  | Rune r; | 
|  | int i, f, n; | 
|  | char *p, *eob; | 
|  |  | 
|  | free(mbuf); | 
|  | mbuf = dirfstat(fd); | 
|  | if(mbuf == nil){ | 
|  | print("cannot stat: %r\n"); | 
|  | return; | 
|  | } | 
|  | if(mbuf->mode & DMDIR) { | 
|  | print(mime ? "text/directory\n" : "directory\n"); | 
|  | return; | 
|  | } | 
|  | if(mbuf->type != 'M' && mbuf->type != '|') { | 
|  | print(mime ? OCTET : "special file #%c/%s\n", | 
|  | mbuf->type, mbuf->name); | 
|  | return; | 
|  | } | 
|  | nbuf = read(fd, buf, sizeof(buf)-1); | 
|  |  | 
|  | if(nbuf < 0) { | 
|  | print("cannot read\n"); | 
|  | return; | 
|  | } | 
|  | if(nbuf == 0) { | 
|  | print(mime ? PLAIN : "empty file\n"); | 
|  | return; | 
|  | } | 
|  | buf[nbuf] = 0; | 
|  |  | 
|  | /* | 
|  | * build histogram table | 
|  | */ | 
|  | memset(cfreq, 0, sizeof(cfreq)); | 
|  | for (i = 0; language[i].name; i++) | 
|  | language[i].count = 0; | 
|  | eob = (char *)buf+nbuf; | 
|  | for(n = 0, p = (char *)buf; p < eob; n++) { | 
|  | if (!fullrune(p, eob-p) && eob-p < UTFmax) | 
|  | break; | 
|  | p += chartorune(&r, p); | 
|  | if (r == 0) | 
|  | f = Cnull; | 
|  | else if (r <= 0x7f) { | 
|  | if (!isprint(r) && !isspace(r)) | 
|  | f = Ceascii;	/* ASCII control char */ | 
|  | else f = r; | 
|  | } else if (r == 0x080) { | 
|  | bump_utf_count(r); | 
|  | f = Cutf; | 
|  | } else if (r < 0xA0) | 
|  | f = Cbinary;	/* Invalid Runes */ | 
|  | else if (r <= 0xff) | 
|  | f = Clatin;	/* Latin 1 */ | 
|  | else { | 
|  | bump_utf_count(r); | 
|  | f = Cutf;		/* UTF extension */ | 
|  | } | 
|  | cfreq[f]++;			/* ASCII chars peg directly */ | 
|  | } | 
|  | /* | 
|  | * gross classify | 
|  | */ | 
|  | if (cfreq[Cbinary]) | 
|  | guess = Fbinary; | 
|  | else if (cfreq[Cutf]) | 
|  | guess = Futf; | 
|  | else if (cfreq[Clatin]) | 
|  | guess = Flatin; | 
|  | else if (cfreq[Ceascii]) | 
|  | guess = Feascii; | 
|  | else if (cfreq[Cnull] == n) { | 
|  | print(mime ? OCTET : "first block all null bytes\n"); | 
|  | return; | 
|  | } | 
|  | else guess = Fascii; | 
|  | /* | 
|  | * lookup dictionary words | 
|  | */ | 
|  | memset(wfreq, 0, sizeof(wfreq)); | 
|  | if(guess == Fascii || guess == Flatin || guess == Futf) | 
|  | wordfreq(); | 
|  | /* | 
|  | * call individual classify routines | 
|  | */ | 
|  | for(i=0; call[i]; i++) | 
|  | if((*call[i])()) | 
|  | return; | 
|  |  | 
|  | /* | 
|  | * if all else fails, | 
|  | * print out gross classification | 
|  | */ | 
|  | if (nbuf < 100 && !mime) | 
|  | print(mime ? PLAIN : "short "); | 
|  | if (guess == Fascii) | 
|  | print(mime ? PLAIN : "Ascii\n"); | 
|  | else if (guess == Feascii) | 
|  | print(mime ? PLAIN : "extended ascii\n"); | 
|  | else if (guess == Flatin) | 
|  | print(mime ? PLAIN : "latin ascii\n"); | 
|  | else if (guess == Futf && utf_count() < 4) | 
|  | print_utf(); | 
|  | else print(mime ? OCTET : "binary\n"); | 
|  | } | 
|  |  | 
|  | void | 
|  | bump_utf_count(Rune r) | 
|  | { | 
|  | int low, high, mid; | 
|  |  | 
|  | high = sizeof(language)/sizeof(language[0])-1; | 
|  | for (low = 0; low < high;) { | 
|  | mid = (low+high)/2; | 
|  | if (r >=language[mid].low) { | 
|  | if (r <= language[mid].high) { | 
|  | language[mid].count++; | 
|  | break; | 
|  | } else low = mid+1; | 
|  | } else high = mid; | 
|  | } | 
|  | } | 
|  |  | 
|  | int | 
|  | utf_count(void) | 
|  | { | 
|  | int i, count; | 
|  |  | 
|  | count = 0; | 
|  | for (i = 0; language[i].name; i++) | 
|  | if (language[i].count > 0) | 
|  | switch (language[i].mode) { | 
|  | case Normal: | 
|  | case First: | 
|  | count++; | 
|  | break; | 
|  | default: | 
|  | break; | 
|  | } | 
|  | return count; | 
|  | } | 
|  |  | 
|  | int | 
|  | chkascii(void) | 
|  | { | 
|  | int i; | 
|  |  | 
|  | for (i = 'a'; i < 'z'; i++) | 
|  | if (cfreq[i]) | 
|  | return 1; | 
|  | for (i = 'A'; i < 'Z'; i++) | 
|  | if (cfreq[i]) | 
|  | return 1; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int | 
|  | find_first(char *name) | 
|  | { | 
|  | int i; | 
|  |  | 
|  | for (i = 0; language[i].name != 0; i++) | 
|  | if (language[i].mode == First | 
|  | && strcmp(language[i].name, name) == 0) | 
|  | return i; | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | void | 
|  | print_utf(void) | 
|  | { | 
|  | int i, printed, j; | 
|  |  | 
|  | if(mime){ | 
|  | print(PLAIN); | 
|  | return; | 
|  | } | 
|  | if (chkascii()) { | 
|  | printed = 1; | 
|  | print("Ascii"); | 
|  | } else | 
|  | printed = 0; | 
|  | for (i = 0; language[i].name; i++) | 
|  | if (language[i].count) { | 
|  | switch(language[i].mode) { | 
|  | case Multi: | 
|  | j = find_first(language[i].name); | 
|  | if (j < 0) | 
|  | break; | 
|  | if (language[j].count > 0) | 
|  | break; | 
|  | /* Fall through */ | 
|  | case Normal: | 
|  | case First: | 
|  | if (printed) | 
|  | print(" & "); | 
|  | else printed = 1; | 
|  | print("%s", language[i].name); | 
|  | break; | 
|  | case Shared: | 
|  | default: | 
|  | break; | 
|  | } | 
|  | } | 
|  | if(!printed) | 
|  | print("UTF"); | 
|  | print(" text\n"); | 
|  | } | 
|  |  | 
|  | void | 
|  | wordfreq(void) | 
|  | { | 
|  | int low, high, mid, r; | 
|  | uchar *p, *p2, c; | 
|  |  | 
|  | p = buf; | 
|  | for(;;) { | 
|  | while (p < buf+nbuf && !isalpha(*p)) | 
|  | p++; | 
|  | if (p >= buf+nbuf) | 
|  | return; | 
|  | p2 = p; | 
|  | while(p < buf+nbuf && isalpha(*p)) | 
|  | p++; | 
|  | c = *p; | 
|  | *p = 0; | 
|  | high = sizeof(dict)/sizeof(dict[0]); | 
|  | for(low = 0;low < high;) { | 
|  | mid = (low+high)/2; | 
|  | r = strcmp(dict[mid].word, (char*)p2); | 
|  | if(r == 0) { | 
|  | wfreq[dict[mid].class]++; | 
|  | break; | 
|  | } | 
|  | if(r < 0) | 
|  | low = mid+1; | 
|  | else | 
|  | high = mid; | 
|  | } | 
|  | *p++ = c; | 
|  | } | 
|  | } | 
|  |  | 
|  | typedef struct Filemagic Filemagic; | 
|  | struct Filemagic { | 
|  | ulong x; | 
|  | ulong mask; | 
|  | char *desc; | 
|  | char *mime; | 
|  | }; | 
|  |  | 
|  | Filemagic long0tab[] = { | 
|  | 0xF16DF16D,	0xFFFFFFFF,	"pac1 audio file\n",	OCTET, | 
|  | 0x31636170,	0xFFFFFFFF,	"pac3 audio file\n",	OCTET, | 
|  | 0x32636170,	0xFFFF00FF,	"pac4 audio file\n",	OCTET, | 
|  | 0xBA010000,	0xFFFFFFFF,	"mpeg system stream\n",	OCTET, | 
|  | 0x30800CC0,	0xFFFFFFFF,	"inferno .dis executable\n", OCTET, | 
|  | 0x04034B50,	0xFFFFFFFF,	"zip archive\n", "application/zip", | 
|  | 070707,		0xFFFF,		"cpio archive\n", OCTET, | 
|  | 0x2F7,		0xFFFF,		"tex dvi\n", "application/dvi", | 
|  | 0xfffa0000,	0xfffe0000,	"mp3 audio",	"audio/mpeg", | 
|  | }; | 
|  |  | 
|  | int | 
|  | filemagic(Filemagic *tab, int ntab, ulong x) | 
|  | { | 
|  | int i; | 
|  |  | 
|  | for(i=0; i<ntab; i++) | 
|  | if((x&tab[i].mask) == tab[i].x){ | 
|  | print(mime ? tab[i].mime : tab[i].desc); | 
|  | return 1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int | 
|  | long0(void) | 
|  | { | 
|  | /*	Fhdr *f; */ | 
|  | long x; | 
|  |  | 
|  | seek(fd, 0, 0);		/* reposition to start of file */ | 
|  | /* | 
|  | if(crackhdr(fd, &f)) { | 
|  | print(mime ? OCTET : "%s\n", f.name); | 
|  | return 1; | 
|  | } | 
|  | */ | 
|  | x = LENDIAN(buf); | 
|  | if(filemagic(long0tab, nelem(long0tab), x)) | 
|  | return 1; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* from tar.c */ | 
|  | enum { NAMSIZ = 100, TBLOCK = 512 }; | 
|  |  | 
|  | union	hblock | 
|  | { | 
|  | char	dummy[TBLOCK]; | 
|  | struct	header | 
|  | { | 
|  | char	name[NAMSIZ]; | 
|  | char	mode[8]; | 
|  | char	uid[8]; | 
|  | char	gid[8]; | 
|  | char	size[12]; | 
|  | char	mtime[12]; | 
|  | char	chksum[8]; | 
|  | char	linkflag; | 
|  | char	linkname[NAMSIZ]; | 
|  | /* rest are defined by POSIX's ustar format; see p1003.2b */ | 
|  | char	magic[6];	/* "ustar" */ | 
|  | char	version[2]; | 
|  | char	uname[32]; | 
|  | char	gname[32]; | 
|  | char	devmajor[8]; | 
|  | char	devminor[8]; | 
|  | char	prefix[155];  /* if non-null, path = prefix "/" name */ | 
|  | } dbuf; | 
|  | }; | 
|  |  | 
|  | int | 
|  | checksum(union hblock *hp) | 
|  | { | 
|  | int i; | 
|  | char *cp; | 
|  | struct header *hdr = &hp->dbuf; | 
|  |  | 
|  | for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++) | 
|  | *cp = ' '; | 
|  | i = 0; | 
|  | for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++) | 
|  | i += *cp & 0xff; | 
|  | return i; | 
|  | } | 
|  |  | 
|  | int | 
|  | istar(void) | 
|  | { | 
|  | int chksum; | 
|  | char tblock[TBLOCK]; | 
|  | union hblock *hp = (union hblock *)tblock; | 
|  | struct header *hdr = &hp->dbuf; | 
|  |  | 
|  | seek(fd, 0, 0);		/* reposition to start of file */ | 
|  | if (readn(fd, tblock, sizeof tblock) != sizeof tblock) | 
|  | return 0; | 
|  | chksum = strtol(hdr->chksum, 0, 8); | 
|  | if (hdr->name[0] != '\0' && checksum(hp) == chksum) { | 
|  | if (strcmp(hdr->magic, "ustar") == 0) | 
|  | print(mime? "application/x-ustar\n": | 
|  | "posix tar archive\n"); | 
|  | else | 
|  | print(mime? "application/x-tar\n": "tar archive\n"); | 
|  | return 1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * initial words to classify file | 
|  | */ | 
|  | struct	FILE_STRING | 
|  | { | 
|  | char 	*key; | 
|  | char	*filetype; | 
|  | int	length; | 
|  | char	*mime; | 
|  | } file_string[] = | 
|  | { | 
|  | "!<arch>\n__.SYMDEF",	"archive random library",	16,	"application/octet-stream", | 
|  | "!<arch>\n",		"archive",			8,	"application/octet-stream", | 
|  | "070707",		"cpio archive - ascii header",	6,	"application/octet-stream", | 
|  | "%!",			"postscript",			2,	"application/postscript", | 
|  | "\004%!",		"postscript",			3,	"application/postscript", | 
|  | "x T post",		"troff output for post",	8,	"application/troff", | 
|  | "x T Latin1",		"troff output for Latin1",	10,	"application/troff", | 
|  | "x T utf",		"troff output for UTF",		7,	"application/troff", | 
|  | "x T 202",		"troff output for 202",		7,	"application/troff", | 
|  | "x T aps",		"troff output for aps",		7,	"application/troff", | 
|  | "GIF",			"GIF image", 			3,	"image/gif", | 
|  | "\0PC Research, Inc\0",	"ghostscript fax file",		18,	"application/ghostscript", | 
|  | "%PDF",			"PDF",				4,	"application/pdf", | 
|  | "<html>\n",		"HTML file",			7,	"text/html", | 
|  | "<HTML>\n",		"HTML file",			7,	"text/html", | 
|  | "compressed\n",		"Compressed image or subfont",	11,	"application/octet-stream", | 
|  | "\111\111\052\000",	"tiff",				4,	"image/tiff", | 
|  | "\115\115\000\052",	"tiff",				4,	"image/tiff", | 
|  | "\377\330\377\340",	"jpeg",				4,	"image/jpeg", | 
|  | "\377\330\377\341",	"jpeg",				4,	"image/jpeg", | 
|  | "\377\330\377\333",	"jpeg",				4,	"image/jpeg", | 
|  | "\106\117\126\142",	"x3f",				4,	"image/x3f", | 
|  | "BM",			"bmp",				2,	"image/bmp", | 
|  | "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",	"microsoft office document",	8,	"application/octet-stream", | 
|  | "<MakerFile ",		"FrameMaker file",		11,	"application/framemaker", | 
|  | "\033%-12345X",	"HPJCL file",		9,	"application/hpjcl", | 
|  | "ID3",			"mp3 audio with id3",	3,	"audio/mpeg", | 
|  | 0,0,0,0 | 
|  | }; | 
|  |  | 
|  | int | 
|  | istring(void) | 
|  | { | 
|  | int i, j; | 
|  | struct FILE_STRING *p; | 
|  |  | 
|  | for(p = file_string; p->key; p++) { | 
|  | if(nbuf >= p->length && !memcmp(buf, p->key, p->length)) { | 
|  | if(mime) | 
|  | print("%s\n", p->mime); | 
|  | else | 
|  | print("%s\n", p->filetype); | 
|  | return 1; | 
|  | } | 
|  | } | 
|  | if(strncmp((char*)buf, "TYPE=", 5) == 0) {	/* td */ | 
|  | for(i = 5; i < nbuf; i++) | 
|  | if(buf[i] == '\n') | 
|  | break; | 
|  | if(mime) | 
|  | print(OCTET); | 
|  | else | 
|  | print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5); | 
|  | return 1; | 
|  | } | 
|  | if(buf[0]=='#' && buf[1]=='!'){ | 
|  | i=2; | 
|  | for(j=2; j < nbuf && buf[j] != ' ' && buf[j] != '\n' && buf[j] != '\r'; j++) | 
|  | if(buf[j] == '/') | 
|  | i = j+1; | 
|  | if(mime) | 
|  | print(PLAIN); | 
|  | else | 
|  | print("%.*s executable file script\n", utfnlen((char*)buf+i, j-i), (char*)buf+i); | 
|  | return 1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int | 
|  | iff(void) | 
|  | { | 
|  | if (strncmp((char*)buf, "FORM", 4) == 0 && | 
|  | strncmp((char*)buf+8, "AIFF", 4) == 0) { | 
|  | print("%s\n", mime? "audio/x-aiff": "aiff audio"); | 
|  | return 1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | char*	html_string[] = | 
|  | { | 
|  | "title", | 
|  | "body", | 
|  | "head", | 
|  | "strong", | 
|  | "h1", | 
|  | "h2", | 
|  | "h3", | 
|  | "h4", | 
|  | "h5", | 
|  | "h6", | 
|  | "ul", | 
|  | "li", | 
|  | "dl", | 
|  | "br", | 
|  | "em", | 
|  | 0, | 
|  | }; | 
|  |  | 
|  | int | 
|  | ishtml(void) | 
|  | { | 
|  | uchar *p, *q; | 
|  | int i, count; | 
|  |  | 
|  | /* compare strings between '<' and '>' to html table */ | 
|  | count = 0; | 
|  | p = buf; | 
|  | for(;;) { | 
|  | while (p < buf+nbuf && *p != '<') | 
|  | p++; | 
|  | p++; | 
|  | if (p >= buf+nbuf) | 
|  | break; | 
|  | if(*p == '/') | 
|  | p++; | 
|  | q = p; | 
|  | while(p < buf+nbuf && *p != '>') | 
|  | p++; | 
|  | if (p >= buf+nbuf) | 
|  | break; | 
|  | for(i = 0; html_string[i]; i++) { | 
|  | if(cistrncmp(html_string[i], (char*)q, p-q) == 0) { | 
|  | if(count++ > 4) { | 
|  | print(mime ? "text/html\n" : "HTML file\n"); | 
|  | return 1; | 
|  | } | 
|  | break; | 
|  | } | 
|  | } | 
|  | p++; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | char*	rfc822_string[] = | 
|  | { | 
|  | "from:", | 
|  | "date:", | 
|  | "to:", | 
|  | "subject:", | 
|  | "received:", | 
|  | "reply to:", | 
|  | "sender:", | 
|  | 0, | 
|  | }; | 
|  |  | 
|  | int | 
|  | isrfc822(void) | 
|  | { | 
|  |  | 
|  | char *p, *q, *r; | 
|  | int i, count; | 
|  |  | 
|  | count = 0; | 
|  | p = (char*)buf; | 
|  | for(;;) { | 
|  | q = strchr(p, '\n'); | 
|  | if(q == nil) | 
|  | break; | 
|  | *q = 0; | 
|  | if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){ | 
|  | count++; | 
|  | *q = '\n'; | 
|  | p = q+1; | 
|  | continue; | 
|  | } | 
|  | *q = '\n'; | 
|  | if(*p != '\t' && *p != ' '){ | 
|  | r = strchr(p, ':'); | 
|  | if(r == 0 || r > q) | 
|  | break; | 
|  | for(i = 0; rfc822_string[i]; i++) { | 
|  | if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){ | 
|  | count++; | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  | p = q+1; | 
|  | } | 
|  | if(count >= 3){ | 
|  | print(mime ? "message/rfc822\n" : "email file\n"); | 
|  | return 1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int | 
|  | ismbox(void) | 
|  | { | 
|  | char *p, *q; | 
|  |  | 
|  | p = (char*)buf; | 
|  | q = strchr(p, '\n'); | 
|  | if(q == nil) | 
|  | return 0; | 
|  | *q = 0; | 
|  | if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){ | 
|  | print(mime ? "text/plain\n" : "mail box\n"); | 
|  | return 1; | 
|  | } | 
|  | *q = '\n'; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int | 
|  | isc(void) | 
|  | { | 
|  | int n; | 
|  |  | 
|  | n = wfreq[I1]; | 
|  | /* | 
|  | * includes | 
|  | */ | 
|  | if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n) | 
|  | goto yes; | 
|  | if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n) | 
|  | goto yes; | 
|  | /* | 
|  | * declarations | 
|  | */ | 
|  | if(wfreq[Cword] >= 5 && cfreq[';'] >= 5) | 
|  | goto yes; | 
|  | /* | 
|  | * assignments | 
|  | */ | 
|  | if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1) | 
|  | goto yes; | 
|  | return 0; | 
|  |  | 
|  | yes: | 
|  | if(mime){ | 
|  | print(PLAIN); | 
|  | return 1; | 
|  | } | 
|  | if(wfreq[Alword] > 0) | 
|  | print("alef program\n"); | 
|  | else | 
|  | print("c program\n"); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | int | 
|  | islimbo(void) | 
|  | { | 
|  |  | 
|  | /* | 
|  | * includes | 
|  | */ | 
|  | if(wfreq[Lword] < 4) | 
|  | return 0; | 
|  | print(mime ? PLAIN : "limbo program\n"); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | int | 
|  | isas(void) | 
|  | { | 
|  |  | 
|  | /* | 
|  | * includes | 
|  | */ | 
|  | if(wfreq[Aword] < 2) | 
|  | return 0; | 
|  | print(mime ? PLAIN : "as program\n"); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * low entropy means encrypted | 
|  | */ | 
|  | int | 
|  | ismung(void) | 
|  | { | 
|  | int i, bucket[8]; | 
|  | float cs; | 
|  |  | 
|  | if(nbuf < 64) | 
|  | return 0; | 
|  | memset(bucket, 0, sizeof(bucket)); | 
|  | for(i=0; i<64; i++) | 
|  | bucket[(buf[i]>>5)&07] += 1; | 
|  |  | 
|  | cs = 0.; | 
|  | for(i=0; i<8; i++) | 
|  | cs += (bucket[i]-8)*(bucket[i]-8); | 
|  | cs /= 8.; | 
|  | if(cs <= 24.322) { | 
|  | if(buf[0]==0x1f && (buf[1]==0x8b || buf[1]==0x9d)) | 
|  | print(mime ? OCTET : "compressed\n"); | 
|  | else | 
|  | print(mime ? OCTET : "encrypted\n"); | 
|  | return 1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * english by punctuation and frequencies | 
|  | */ | 
|  | int | 
|  | isenglish(void) | 
|  | { | 
|  | int vow, comm, rare, badpun, punct; | 
|  | char *p; | 
|  |  | 
|  | if(guess != Fascii && guess != Feascii) | 
|  | return 0; | 
|  | badpun = 0; | 
|  | punct = 0; | 
|  | for(p = (char *)buf; p < (char *)buf+nbuf-1; p++) | 
|  | switch(*p) { | 
|  | case '.': | 
|  | case ',': | 
|  | case ')': | 
|  | case '%': | 
|  | case ';': | 
|  | case ':': | 
|  | case '?': | 
|  | punct++; | 
|  | if(p[1] != ' ' && p[1] != '\n') | 
|  | badpun++; | 
|  | } | 
|  | if(badpun*5 > punct) | 
|  | return 0; | 
|  | if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e'])	/* shell file test */ | 
|  | return 0; | 
|  | if(2*cfreq[';'] > cfreq['e']) | 
|  | return 0; | 
|  |  | 
|  | vow = 0; | 
|  | for(p="AEIOU"; *p; p++) { | 
|  | vow += cfreq[(uchar)*p]; | 
|  | vow += cfreq[tolower((uchar)*p)]; | 
|  | } | 
|  | comm = 0; | 
|  | for(p="ETAION"; *p; p++) { | 
|  | comm += cfreq[(uchar)*p]; | 
|  | comm += cfreq[tolower((uchar)*p)]; | 
|  | } | 
|  | rare = 0; | 
|  | for(p="VJKQXZ"; *p; p++) { | 
|  | rare += cfreq[(uchar)*p]; | 
|  | rare += cfreq[tolower((uchar)*p)]; | 
|  | } | 
|  | if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) { | 
|  | print(mime ? PLAIN : "English text\n"); | 
|  | return 1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * pick up a number with | 
|  | * syntax _*[0-9]+_ | 
|  | */ | 
|  | #define	P9BITLEN	12 | 
|  | int | 
|  | p9bitnum(uchar *bp) | 
|  | { | 
|  | int n, c, len; | 
|  |  | 
|  | len = P9BITLEN; | 
|  | while(*bp == ' ') { | 
|  | bp++; | 
|  | len--; | 
|  | if(len <= 0) | 
|  | return -1; | 
|  | } | 
|  | n = 0; | 
|  | while(len > 1) { | 
|  | c = *bp++; | 
|  | if(!isdigit(c)) | 
|  | return -1; | 
|  | n = n*10 + c-'0'; | 
|  | len--; | 
|  | } | 
|  | if(*bp != ' ') | 
|  | return -1; | 
|  | return n; | 
|  | } | 
|  |  | 
|  | int | 
|  | depthof(char *s, int *newp) | 
|  | { | 
|  | char *es; | 
|  | int d; | 
|  |  | 
|  | *newp = 0; | 
|  | es = s+12; | 
|  | while(s<es && *s==' ') | 
|  | s++; | 
|  | if(s == es) | 
|  | return -1; | 
|  | if('0'<=*s && *s<='9') | 
|  | return 1<<atoi(s); | 
|  |  | 
|  | *newp = 1; | 
|  | d = 0; | 
|  | while(s<es && *s!=' '){ | 
|  | s++;	/* skip letter */ | 
|  | d += strtoul(s, &s, 10); | 
|  | } | 
|  |  | 
|  | switch(d){ | 
|  | case 32: | 
|  | case 24: | 
|  | case 16: | 
|  | case 8: | 
|  | return d; | 
|  | } | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | int | 
|  | isp9bit(void) | 
|  | { | 
|  | int dep, lox, loy, hix, hiy, px, new; | 
|  | ulong t; | 
|  | long len; | 
|  | char *newlabel; | 
|  |  | 
|  | newlabel = "old "; | 
|  |  | 
|  | dep = depthof((char*)buf + 0*P9BITLEN, &new); | 
|  | if(new) | 
|  | newlabel = ""; | 
|  | lox = p9bitnum(buf + 1*P9BITLEN); | 
|  | loy = p9bitnum(buf + 2*P9BITLEN); | 
|  | hix = p9bitnum(buf + 3*P9BITLEN); | 
|  | hiy = p9bitnum(buf + 4*P9BITLEN); | 
|  | if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0) | 
|  | return 0; | 
|  |  | 
|  | if(dep < 8){ | 
|  | px = 8/dep;	/* pixels per byte */ | 
|  | /* set l to number of bytes of data per scan line */ | 
|  | if(lox >= 0) | 
|  | len = (hix+px-1)/px - lox/px; | 
|  | else{	/* make positive before divide */ | 
|  | t = (-lox)+px-1; | 
|  | t = (t/px)*px; | 
|  | len = (t+hix+px-1)/px; | 
|  | } | 
|  | }else | 
|  | len = (hix-lox)*dep/8; | 
|  | len *= (hiy-loy);		/* col length */ | 
|  | len += 5*P9BITLEN;		/* size of initial ascii */ | 
|  |  | 
|  | /* | 
|  | * for image file, length is non-zero and must match calculation above | 
|  | * for /dev/window and /dev/screen the length is always zero | 
|  | * for subfont, the subfont header should follow immediately. | 
|  | */ | 
|  | if (len != 0 && mbuf->length == 0) { | 
|  | print("%splan 9 image\n", newlabel); | 
|  | return 1; | 
|  | } | 
|  | if (mbuf->length == len) { | 
|  | print("%splan 9 image\n", newlabel); | 
|  | return 1; | 
|  | } | 
|  | /* Ghostscript sometimes produces a little extra on the end */ | 
|  | if (mbuf->length < len+P9BITLEN) { | 
|  | print("%splan 9 image\n", newlabel); | 
|  | return 1; | 
|  | } | 
|  | if (p9subfont(buf+len)) { | 
|  | print("%ssubfont file\n", newlabel); | 
|  | return 1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int | 
|  | p9subfont(uchar *p) | 
|  | { | 
|  | int n, h, a; | 
|  |  | 
|  | /* if image too big, assume it's a subfont */ | 
|  | if (p+3*P9BITLEN > buf+sizeof(buf)) | 
|  | return 1; | 
|  |  | 
|  | n = p9bitnum(p + 0*P9BITLEN);	/* char count */ | 
|  | if (n < 0) | 
|  | return 0; | 
|  | h = p9bitnum(p + 1*P9BITLEN);	/* height */ | 
|  | if (h < 0) | 
|  | return 0; | 
|  | a = p9bitnum(p + 2*P9BITLEN);	/* ascent */ | 
|  | if (a < 0) | 
|  | return 0; | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | #define	WHITESPACE(c)		((c) == ' ' || (c) == '\t' || (c) == '\n') | 
|  |  | 
|  | int | 
|  | isp9font(void) | 
|  | { | 
|  | uchar *cp, *p; | 
|  | int i, n; | 
|  | char pathname[1024]; | 
|  |  | 
|  | cp = buf; | 
|  | if (!getfontnum(cp, &cp))	/* height */ | 
|  | return 0; | 
|  | if (!getfontnum(cp, &cp))	/* ascent */ | 
|  | return 0; | 
|  | for (i = 0; 1; i++) { | 
|  | if (!getfontnum(cp, &cp))	/* min */ | 
|  | break; | 
|  | if (!getfontnum(cp, &cp))	/* max */ | 
|  | return 0; | 
|  | while (WHITESPACE(*cp)) | 
|  | cp++; | 
|  | for (p = cp; *cp && !WHITESPACE(*cp); cp++) | 
|  | ; | 
|  | /* construct a path name, if needed */ | 
|  | n = 0; | 
|  | if (*p != '/' && slash) { | 
|  | n = slash-fname+1; | 
|  | if (n < sizeof(pathname)) | 
|  | memcpy(pathname, fname, n); | 
|  | else n = 0; | 
|  | } | 
|  | if (n+cp-p < sizeof(pathname)) { | 
|  | memcpy(pathname+n, p, cp-p); | 
|  | n += cp-p; | 
|  | pathname[n] = 0; | 
|  | if (access(pathname, AEXIST) < 0) | 
|  | return 0; | 
|  | } | 
|  | } | 
|  | if (i) { | 
|  | print(mime ? "text/plain\n" : "font file\n"); | 
|  | return 1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int | 
|  | getfontnum(uchar *cp, uchar **rp) | 
|  | { | 
|  | while (WHITESPACE(*cp))		/* extract ulong delimited by whitespace */ | 
|  | cp++; | 
|  | if (*cp < '0' || *cp > '9') | 
|  | return 0; | 
|  | strtoul((char *)cp, (char **)rp, 0); | 
|  | if (!WHITESPACE(**rp)) | 
|  | return 0; | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | int | 
|  | isrtf(void) | 
|  | { | 
|  | if(strstr((char *)buf, "\\rtf1")){ | 
|  | print(mime ? "application/rtf\n" : "rich text format\n"); | 
|  | return 1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int | 
|  | ismsdos(void) | 
|  | { | 
|  | if (buf[0] == 0x4d && buf[1] == 0x5a){ | 
|  | print(mime ? "application/x-msdownload\n" : "MSDOS executable\n"); | 
|  | return 1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int | 
|  | iself(void) | 
|  | { | 
|  | static char *cpu[] = {		/* NB: incomplete and arbitary list */ | 
|  | nil, | 
|  | /*1*/	"WE32100", | 
|  | /*2*/	"SPARC", | 
|  | /*3*/	"i386", | 
|  | /*4*/	"M68000", | 
|  | /*5*/	"M88000", | 
|  | /*6*/	"i486", | 
|  | /*7*/	"i860", | 
|  | /*8*/	"R3000", | 
|  | /*9*/	"S370", | 
|  | /*10*/	"R4000", | 
|  | nil, nil, nil, nil, | 
|  | /*15*/	"HP-PA", | 
|  | nil, | 
|  | nil, | 
|  | /*18*/	"sparc v8+", | 
|  | /*19*/	"i960", | 
|  | /*20*/	"PPC-32", | 
|  | /*21*/	"PPC-64", | 
|  | nil, nil, nil, nil, | 
|  | nil, nil, nil, nil, nil, | 
|  | nil, nil, nil, nil, nil, | 
|  | nil, nil, nil, nil, | 
|  | /*40*/	"ARM", | 
|  | /*41*/	"Alpha", | 
|  | nil, | 
|  | /*43*/	"sparc v9", | 
|  | nil, nil, | 
|  | nil, nil, nil, nil, | 
|  | /*50*/	"IA-64", | 
|  | nil, nil, nil, nil, nil, | 
|  | nil, nil, nil, nil, nil, | 
|  | nil, | 
|  | /*62*/	"AMD64", | 
|  | nil, nil, nil, | 
|  | nil, nil, nil, nil, nil, | 
|  | nil, nil, nil, nil, | 
|  | /*75*/	"VAX", | 
|  | }; | 
|  |  | 
|  |  | 
|  | if (memcmp(buf, "\177ELF", 4) == 0){ | 
|  | /* gcc misparses \x7FELF as \x7FE L F */ | 
|  | if (!mime){ | 
|  | int n = (buf[19] << 8) | buf[18]; | 
|  | char *p = "unknown"; | 
|  |  | 
|  | if (n > 0 && n < nelem(cpu) && cpu[n]) | 
|  | p = cpu[n]; | 
|  | else { | 
|  | /* try the other byte order */ | 
|  | n = (buf[18] << 8) | buf[19]; | 
|  | if (n > 0 && n < nelem(cpu) && cpu[n]) | 
|  | p = cpu[n]; | 
|  | } | 
|  | print("%s ELF executable\n", p); | 
|  | } | 
|  | else | 
|  | print("application/x-elf-executable"); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } |