src/cmd/file.c - plan9 - Git at Google

 #include <u.h>
 #include <libc.h>
 #include <bio.h>
 #include <ctype.h>
 #include <mach.h>

 /*
  * file - determine type of file
  */
 #define	LENDIAN(p)	((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))

 uchar	buf[6001];
 short	cfreq[140];
 short	wfreq[50];
 int	nbuf;
 Dir*	mbuf;
 int	fd;
 char 	*fname;
 char	*slash;

 enum
 {
 	Cword,
 	Fword,
 	Aword,
 	Alword,
 	Lword,
 	I1,
 	I2,
 	I3,
 	Clatin	= 128,
 	Cbinary,
 	Cnull,
 	Ceascii,
 	Cutf,
 };
 struct
 {
 	char*	word;
 	int	class;
 } dict[] =
 {
 	"PATH",		Lword,
 	"TEXT",		Aword,
 	"adt",		Alword,
 	"aggr",		Alword,
 	"alef",		Alword,
 	"array",	Lword,
 	"block",	Fword,
 	"chan",		Alword,
 	"char",		Cword,
 	"common",	Fword,
 	"con",		Lword,
 	"data",		Fword,
 	"dimension",	Fword,
 	"double",	Cword,
 	"extern",	Cword,
 	"bio",		I2,
 	"float",	Cword,
 	"fn",		Lword,
 	"function",	Fword,
 	"h",		I3,
 	"implement",	Lword,
 	"import",	Lword,
 	"include",	I1,
 	"int",		Cword,
 	"integer",	Fword,
 	"iota",		Lword,
 	"libc",		I2,
 	"long",		Cword,
 	"module",	Lword,
 	"real",		Fword,
 	"ref",		Lword,
 	"register",	Cword,
 	"self",		Lword,
 	"short",	Cword,
 	"static",	Cword,
 	"stdio",	I2,
 	"struct",	Cword,
 	"subroutine",	Fword,
 	"u",		I2,
 	"void",		Cword,
 };

 /* codes for 'mode' field in language structure */
 enum	{
 		Normal	= 0,
 		First,		/* first entry for language spanning several ranges */
 		Multi,		/* later entries "   "       "  ... */
 		Shared,		/* codes used in several languages */
 	};

 struct
 {
 	int	mode;		/* see enum above */
 	int 	count;
 	int	low;
 	int	high;
 	char	*name;

 } language[] =
 {
 	Normal, 0,	0x0080, 0x0080,	"Extended Latin",
 	Normal,	0,	0x0100,	0x01FF,	"Extended Latin",
 	Normal,	0,	0x0370,	0x03FF,	"Greek",
 	Normal,	0,	0x0400,	0x04FF,	"Cyrillic",
 	Normal,	0,	0x0530,	0x058F,	"Armenian",
 	Normal,	0,	0x0590,	0x05FF,	"Hebrew",
 	Normal,	0,	0x0600,	0x06FF,	"Arabic",
 	Normal,	0,	0x0900,	0x097F,	"Devanagari",
 	Normal,	0,	0x0980,	0x09FF,	"Bengali",
 	Normal,	0,	0x0A00,	0x0A7F,	"Gurmukhi",
 	Normal,	0,	0x0A80,	0x0AFF,	"Gujarati",
 	Normal,	0,	0x0B00,	0x0B7F,	"Oriya",
 	Normal,	0,	0x0B80,	0x0BFF,	"Tamil",
 	Normal,	0,	0x0C00,	0x0C7F,	"Telugu",
 	Normal,	0,	0x0C80,	0x0CFF,	"Kannada",
 	Normal,	0,	0x0D00,	0x0D7F,	"Malayalam",
 	Normal,	0,	0x0E00,	0x0E7F,	"Thai",
 	Normal,	0,	0x0E80,	0x0EFF,	"Lao",
 	Normal,	0,	0x1000,	0x105F,	"Tibetan",
 	Normal,	0,	0x10A0,	0x10FF,	"Georgian",
 	Normal,	0,	0x3040,	0x30FF,	"Japanese",
 	Normal,	0,	0x3100,	0x312F,	"Chinese",
 	First,	0,	0x3130,	0x318F,	"Korean",
 	Multi,	0,	0x3400,	0x3D2F,	"Korean",
 	Shared,	0,	0x4e00,	0x9fff,	"CJK",
 	Normal,	0,	0,	0,	0,		/* terminal entry */
 };


 enum
 {
 	Fascii,		/* printable ascii */
 	Flatin,		/* latin 1*/
 	Futf,		/* UTf character set */
 	Fbinary,	/* binary */
 	Feascii,	/* ASCII with control chars */
 	Fnull,		/* NULL in file */
 } guess;

 void	bump_utf_count(Rune);
 int	cistrncmp(char*, char*, int);
 void	filetype(int);
 int	getfontnum(uchar*, uchar**);
 int	isas(void);
 int	isc(void);
 int	isenglish(void);
 int	ishp(void);
 int	ishtml(void);
 int	isrfc822(void);
 int	ismbox(void);
 int	islimbo(void);
 int	ismung(void);
 int	isp9bit(void);
 int	isp9font(void);
 int	isrtf(void);
 int	ismsdos(void);
 int	iself(void);
 int	istring(void);
 int	iff(void);
 int	long0(void);
 int	istar(void);
 int	p9bitnum(uchar*);
 int	p9subfont(uchar*);
 void	print_utf(void);
 void	type(char*, int);
 int	utf_count(void);
 void	wordfreq(void);

 int	(*call[])(void) =
 {
 	long0,		/* recognizable by first 4 bytes */
 	istring,	/* recognizable by first string */
 	iff,		/* interchange file format (strings) */
 	isrfc822,	/* email file */
 	ismbox,		/* mail box */
 	istar,		/* recognizable by tar checksum */
 	ishtml,		/* html keywords */
 /*	iscint,		/* compiler/assembler intermediate */
 	islimbo,	/* limbo source */
 	isc,		/* c & alef compiler key words */
 	isas,		/* assembler key words */
 	ismung,		/* entropy compressed/encrypted */
 	isp9font,	/* plan 9 font */
 	isp9bit,	/* plan 9 image (as from /dev/window) */
 	isenglish,	/* char frequency English */
 	isrtf,		/* rich text format */
 	ismsdos,	/* msdos exe (virus file attachement) */
 	iself,		/* ELF (foreign) executable */
 	0
 };

 int mime;

 #define OCTET	"application/octet-stream\n"
 #define PLAIN	"text/plain\n"

 void
 main(int argc, char *argv[])
 {
 	int i, j, maxlen;
 	char *cp;
 	Rune r;

 	ARGBEGIN{
 	case 'm':
 		mime = 1;
 		break;
 	default:
 		fprint(2, "usage: file [-m] [file...]\n");
 		exits("usage");
 	}ARGEND;

 	maxlen = 0;
 	if(mime == 0 || argc > 1){
 		for(i = 0; i < argc; i++) {
 			for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
 					;
 			if(j > maxlen)
 				maxlen = j;
 		}
 	}
 	if (argc <= 0) {
 		if(!mime)
 			print ("stdin: ");
 		filetype(0);
 	}
 	else {
 		for(i = 0; i < argc; i++)
 			type(argv[i], maxlen);
 	}
 	exits(0);
 }

 void
 type(char *file, int nlen)
 {
 	Rune r;
 	int i;
 	char *p;

 	if(nlen > 0){
 		slash = 0;
 		for (i = 0, p = file; *p; i++) {
 			if (*p == '/')			/* find rightmost slash */
 				slash = p;
 			p += chartorune(&r, p);		/* count runes */
 		}
 		print("%s:%*s",file, nlen-i+1, "");
 	}
 	fname = file;
 	if ((fd = open(file, OREAD)) < 0) {
 		print("cannot open\n");
 		return;
 	}
 	filetype(fd);
 	close(fd);
 }

 void
 filetype(int fd)
 {
 	Rune r;
 	int i, f, n;
 	char *p, *eob;

 	free(mbuf);
 	mbuf = dirfstat(fd);
 	if(mbuf == nil){
 		print("cannot stat: %r\n");
 		return;
 	}
 	if(mbuf->mode & DMDIR) {
 		print(mime ? "text/directory\n" : "directory\n");
 		return;
 	}
 	if(mbuf->type != 'M' && mbuf->type != '|') {
 		print(mime ? OCTET : "special file #%c/%s\n",
 			mbuf->type, mbuf->name);
 		return;
 	}
 	nbuf = read(fd, buf, sizeof(buf)-1);

 	if(nbuf < 0) {
 		print("cannot read\n");
 		return;
 	}
 	if(nbuf == 0) {
 		print(mime ? PLAIN : "empty file\n");
 		return;
 	}
 	buf[nbuf] = 0;

 	/*
 	 * build histogram table
 	 */
 	memset(cfreq, 0, sizeof(cfreq));
 	for (i = 0; language[i].name; i++)
 		language[i].count = 0;
 	eob = (char *)buf+nbuf;
 	for(n = 0, p = (char *)buf; p < eob; n++) {
 		if (!fullrune(p, eob-p) && eob-p < UTFmax)
 			break;
 		p += chartorune(&r, p);
 		if (r == 0)
 			f = Cnull;
 		else if (r <= 0x7f) {
 			if (!isprint(r) && !isspace(r))
 				f = Ceascii;	/* ASCII control char */
 			else f = r;
 		} else if (r == 0x080) {
 			bump_utf_count(r);
 			f = Cutf;
 		} else if (r < 0xA0)
 				f = Cbinary;	/* Invalid Runes */
 		else if (r <= 0xff)
 				f = Clatin;	/* Latin 1 */
 		else {
 			bump_utf_count(r);
 			f = Cutf;		/* UTF extension */
 		}
 		cfreq[f]++;			/* ASCII chars peg directly */
 	}
 	/*
 	 * gross classify
 	 */
 	if (cfreq[Cbinary])
 		guess = Fbinary;
 	else if (cfreq[Cutf])
 		guess = Futf;
 	else if (cfreq[Clatin])
 		guess = Flatin;
 	else if (cfreq[Ceascii])
 		guess = Feascii;
 	else if (cfreq[Cnull] == n) {
 		print(mime ? OCTET : "first block all null bytes\n");
 		return;
 	}
 	else guess = Fascii;
 	/*
 	 * lookup dictionary words
 	 */
 	memset(wfreq, 0, sizeof(wfreq));
 	if(guess == Fascii || guess == Flatin || guess == Futf)
 		wordfreq();
 	/*
 	 * call individual classify routines
 	 */
 	for(i=0; call[i]; i++)
 		if((*call[i])())
 			return;

 	/*
 	 * if all else fails,
 	 * print out gross classification
 	 */
 	if (nbuf < 100 && !mime)
 		print(mime ? PLAIN : "short ");
 	if (guess == Fascii)
 		print(mime ? PLAIN : "Ascii\n");
 	else if (guess == Feascii)
 		print(mime ? PLAIN : "extended ascii\n");
 	else if (guess == Flatin)
 		print(mime ? PLAIN : "latin ascii\n");
 	else if (guess == Futf && utf_count() < 4)
 		print_utf();
 	else print(mime ? OCTET : "binary\n");
 }

 void
 bump_utf_count(Rune r)
 {
 	int low, high, mid;

 	high = sizeof(language)/sizeof(language[0])-1;
 	for (low = 0; low < high;) {
 		mid = (low+high)/2;
 		if (r >=language[mid].low) {
 			if (r <= language[mid].high) {
 				language[mid].count++;
 				break;
 			} else low = mid+1;
 		} else high = mid;
 	}
 }

 int
 utf_count(void)
 {
 	int i, count;

 	count = 0;
 	for (i = 0; language[i].name; i++)
 		if (language[i].count > 0)
 			switch (language[i].mode) {
 			case Normal:
 			case First:
 				count++;
 				break;
 			default:
 				break;
 			}
 	return count;
 }

 int
 chkascii(void)
 {
 	int i;

 	for (i = 'a'; i < 'z'; i++)
 		if (cfreq[i])
 			return 1;
 	for (i = 'A'; i < 'Z'; i++)
 		if (cfreq[i])
 			return 1;
 	return 0;
 }

 int
 find_first(char *name)
 {
 	int i;

 	for (i = 0; language[i].name != 0; i++)
 		if (language[i].mode == First
 			&& strcmp(language[i].name, name) == 0)
 			return i;
 	return -1;
 }

 void
 print_utf(void)
 {
 	int i, printed, j;

 	if(mime){
 		print(PLAIN);
 		return;
 	}
 	if (chkascii()) {
 		printed = 1;
 		print("Ascii");
 	} else
 		printed = 0;
 	for (i = 0; language[i].name; i++)
 		if (language[i].count) {
 			switch(language[i].mode) {
 			case Multi:
 				j = find_first(language[i].name);
 				if (j < 0)
 					break;
 				if (language[j].count > 0)
 					break;
 				/* Fall through */
 			case Normal:
 			case First:
 				if (printed)
 					print(" & ");
 				else printed = 1;
 				print("%s", language[i].name);
 				break;
 			case Shared:
 			default:
 				break;
 			}
 		}
 	if(!printed)
 		print("UTF");
 	print(" text\n");
 }

 void
 wordfreq(void)
 {
 	int low, high, mid, r;
 	uchar *p, *p2, c;

 	p = buf;
 	for(;;) {
 		while (p < buf+nbuf && !isalpha(*p))
 			p++;
 		if (p >= buf+nbuf)
 			return;
 		p2 = p;
 		while(p < buf+nbuf && isalpha(*p))
 			p++;
 		c = *p;
 		*p = 0;
 		high = sizeof(dict)/sizeof(dict[0]);
 		for(low = 0;low < high;) {
 			mid = (low+high)/2;
 			r = strcmp(dict[mid].word, (char*)p2);
 			if(r == 0) {
 				wfreq[dict[mid].class]++;
 				break;
 			}
 			if(r < 0)
 				low = mid+1;
 			else
 				high = mid;
 		}
 		*p++ = c;
 	}
 }

 typedef struct Filemagic Filemagic;
 struct Filemagic {
 	ulong x;
 	ulong mask;
 	char *desc;
 	char *mime;
 };

 Filemagic long0tab[] = {
 	0xF16DF16D,	0xFFFFFFFF,	"pac1 audio file\n",	OCTET,
 	0x31636170,	0xFFFFFFFF,	"pac3 audio file\n",	OCTET,
 	0x32636170,	0xFFFF00FF,	"pac4 audio file\n",	OCTET,
 	0xBA010000,	0xFFFFFFFF,	"mpeg system stream\n",	OCTET,
 	0x30800CC0,	0xFFFFFFFF,	"inferno .dis executable\n", OCTET,
 	0x04034B50,	0xFFFFFFFF,	"zip archive\n", "application/zip",
 	070707,		0xFFFF,		"cpio archive\n", OCTET,
 	0x2F7,		0xFFFF,		"tex dvi\n", "application/dvi",
 	0xfffa0000,	0xfffe0000,	"mp3 audio",	"audio/mpeg",
 };

 int
 filemagic(Filemagic *tab, int ntab, ulong x)
 {
 	int i;

 	for(i=0; i<ntab; i++)
 		if((x&tab[i].mask) == tab[i].x){
 			print(mime ? tab[i].mime : tab[i].desc);
 			return 1;
 		}
 	return 0;
 }

 int
 long0(void)
 {
 /*	Fhdr *f; */
 	long x;

 	seek(fd, 0, 0);		/* reposition to start of file */
 /*
 	if(crackhdr(fd, &f)) {
 		print(mime ? OCTET : "%s\n", f.name);
 		return 1;
 	}
 */
 	x = LENDIAN(buf);
 	if(filemagic(long0tab, nelem(long0tab), x))
 		return 1;
 	return 0;
 }

 /* from tar.c */
 enum { NAMSIZ = 100, TBLOCK = 512 };

 union	hblock
 {
 	char	dummy[TBLOCK];
 	struct	header
 	{
 		char	name[NAMSIZ];
 		char	mode[8];
 		char	uid[8];
 		char	gid[8];
 		char	size[12];
 		char	mtime[12];
 		char	chksum[8];
 		char	linkflag;
 		char	linkname[NAMSIZ];
 		/* rest are defined by POSIX's ustar format; see p1003.2b */
 		char	magic[6];	/* "ustar" */
 		char	version[2];
 		char	uname[32];
 		char	gname[32];
 		char	devmajor[8];
 		char	devminor[8];
 		char	prefix[155];  /* if non-null, path = prefix "/" name */
 	} dbuf;
 };

 int
 checksum(union hblock *hp)
 {
 	int i;
 	char *cp;
 	struct header *hdr = &hp->dbuf;

 	for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
 		*cp = ' ';
 	i = 0;
 	for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
 		i += *cp & 0xff;
 	return i;
 }

 int
 istar(void)
 {
 	int chksum;
 	char tblock[TBLOCK];
 	union hblock *hp = (union hblock *)tblock;
 	struct header *hdr = &hp->dbuf;

 	seek(fd, 0, 0);		/* reposition to start of file */
 	if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
 		return 0;
 	chksum = strtol(hdr->chksum, 0, 8);
 	if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
 		if (strcmp(hdr->magic, "ustar") == 0)
 			print(mime? "application/x-ustar\n":
 				"posix tar archive\n");
 		else
 			print(mime? "application/x-tar\n": "tar archive\n");
 		return 1;
 	}
 	return 0;
 }

 /*
  * initial words to classify file
  */
 struct	FILE_STRING
 {
 	char 	*key;
 	char	*filetype;
 	int	length;
 	char	*mime;
 } file_string[] =
 {
 	"!<arch>\n__.SYMDEF",	"archive random library",	16,	"application/octet-stream",
 	"!<arch>\n",		"archive",			8,	"application/octet-stream",
 	"070707",		"cpio archive - ascii header",	6,	"application/octet-stream",
 	"%!",			"postscript",			2,	"application/postscript",
 	"\004%!",		"postscript",			3,	"application/postscript",
 	"x T post",		"troff output for post",	8,	"application/troff",
 	"x T Latin1",		"troff output for Latin1",	10,	"application/troff",
 	"x T utf",		"troff output for UTF",		7,	"application/troff",
 	"x T 202",		"troff output for 202",		7,	"application/troff",
 	"x T aps",		"troff output for aps",		7,	"application/troff",
 	"GIF",			"GIF image", 			3,	"image/gif",
 	"\0PC Research, Inc\0",	"ghostscript fax file",		18,	"application/ghostscript",
 	"%PDF",			"PDF",				4,	"application/pdf",
 	"<html>\n",		"HTML file",			7,	"text/html",
 	"<HTML>\n",		"HTML file",			7,	"text/html",
 	"compressed\n",		"Compressed image or subfont",	11,	"application/octet-stream",
 	"\111\111\052\000",	"tiff",				4,	"image/tiff",
 	"\115\115\000\052",	"tiff",				4,	"image/tiff",
 	"\377\330\377\340",	"jpeg",				4,	"image/jpeg",
 	"\377\330\377\341",	"jpeg",				4,	"image/jpeg",
 	"\377\330\377\333",	"jpeg",				4,	"image/jpeg",
 	"\106\117\126\142",	"x3f",				4,	"image/x3f",
 	"BM",			"bmp",				2,	"image/bmp",
 	"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",	"microsoft office document",	8,	"application/octet-stream",
 	"<MakerFile ",		"FrameMaker file",		11,	"application/framemaker",
 	"\033%-12345X",	"HPJCL file",		9,	"application/hpjcl",
 	"ID3",			"mp3 audio with id3",	3,	"audio/mpeg",
 	0,0,0,0
 };

 int
 istring(void)
 {
 	int i, j;
 	struct FILE_STRING *p;

 	for(p = file_string; p->key; p++) {
 		if(nbuf >= p->length && !memcmp(buf, p->key, p->length)) {
 			if(mime)
 				print("%s\n", p->mime);
 			else
 				print("%s\n", p->filetype);
 			return 1;
 		}
 	}
 	if(strncmp((char*)buf, "TYPE=", 5) == 0) {	/* td */
 		for(i = 5; i < nbuf; i++)
 			if(buf[i] == '\n')
 				break;
 		if(mime)
 			print(OCTET);
 		else
 			print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
 		return 1;
 	}
 	if(buf[0]=='#' && buf[1]=='!'){
 		i=2;
 		for(j=2; j < nbuf && buf[j] != ' ' && buf[j] != '\n' && buf[j] != '\r'; j++)
 			if(buf[j] == '/')
 				i = j+1;
 		if(mime)
 			print(PLAIN);
 		else
 			print("%.*s executable file script\n", utfnlen((char*)buf+i, j-i), (char*)buf+i);
 		return 1;
 	}
 	return 0;
 }

 int
 iff(void)
 {
 	if (strncmp((char*)buf, "FORM", 4) == 0 &&
 	    strncmp((char*)buf+8, "AIFF", 4) == 0) {
 		print("%s\n", mime? "audio/x-aiff": "aiff audio");
 		return 1;
 	}
 	return 0;
 }

 char*	html_string[] =
 {
 	"title",
 	"body",
 	"head",
 	"strong",
 	"h1",
 	"h2",
 	"h3",
 	"h4",
 	"h5",
 	"h6",
 	"ul",
 	"li",
 	"dl",
 	"br",
 	"em",
 	0,
 };

 int
 ishtml(void)
 {
 	uchar *p, *q;
 	int i, count;

 		/* compare strings between '<' and '>' to html table */
 	count = 0;
 	p = buf;
 	for(;;) {
 		while (p < buf+nbuf && *p != '<')
 			p++;
 		p++;
 		if (p >= buf+nbuf)
 			break;
 		if(*p == '/')
 			p++;
 		q = p;
 		while(p < buf+nbuf && *p != '>')
 			p++;
 		if (p >= buf+nbuf)
 			break;
 		for(i = 0; html_string[i]; i++) {
 			if(cistrncmp(html_string[i], (char*)q, p-q) == 0) {
 				if(count++ > 4) {
 					print(mime ? "text/html\n" : "HTML file\n");
 					return 1;
 				}
 				break;
 			}
 		}
 		p++;
 	}
 	return 0;
 }

 char*	rfc822_string[] =
 {
 	"from:",
 	"date:",
 	"to:",
 	"subject:",
 	"received:",
 	"reply to:",
 	"sender:",
 	0,
 };

 int
 isrfc822(void)
 {

 	char *p, *q, *r;
 	int i, count;

 	count = 0;
 	p = (char*)buf;
 	for(;;) {
 		q = strchr(p, '\n');
 		if(q == nil)
 			break;
 		*q = 0;
 		if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
 			count++;
 			*q = '\n';
 			p = q+1;
 			continue;
 		}
 		*q = '\n';
 		if(*p != '\t' && *p != ' '){
 			r = strchr(p, ':');
 			if(r == 0 || r > q)
 				break;
 			for(i = 0; rfc822_string[i]; i++) {
 				if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
 					count++;
 					break;
 				}
 			}
 		}
 		p = q+1;
 	}
 	if(count >= 3){
 		print(mime ? "message/rfc822\n" : "email file\n");
 		return 1;
 	}
 	return 0;
 }

 int
 ismbox(void)
 {
 	char *p, *q;

 	p = (char*)buf;
 	q = strchr(p, '\n');
 	if(q == nil)
 		return 0;
 	*q = 0;
 	if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
 		print(mime ? "text/plain\n" : "mail box\n");
 		return 1;
 	}
 	*q = '\n';
 	return 0;
 }

 int
 isc(void)
 {
 	int n;

 	n = wfreq[I1];
 	/*
 	 * includes
 	 */
 	if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
 		goto yes;
 	if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
 		goto yes;
 	/*
 	 * declarations
 	 */
 	if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
 		goto yes;
 	/*
 	 * assignments
 	 */
 	if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
 		goto yes;
 	return 0;

 yes:
 	if(mime){
 		print(PLAIN);
 		return 1;
 	}
 	if(wfreq[Alword] > 0)
 		print("alef program\n");
 	else
 		print("c program\n");
 	return 1;
 }

 int
 islimbo(void)
 {

 	/*
 	 * includes
 	 */
 	if(wfreq[Lword] < 4)
 		return 0;
 	print(mime ? PLAIN : "limbo program\n");
 	return 1;
 }

 int
 isas(void)
 {

 	/*
 	 * includes
 	 */
 	if(wfreq[Aword] < 2)
 		return 0;
 	print(mime ? PLAIN : "as program\n");
 	return 1;
 }

 /*
  * low entropy means encrypted
  */
 int
 ismung(void)
 {
 	int i, bucket[8];
 	float cs;

 	if(nbuf < 64)
 		return 0;
 	memset(bucket, 0, sizeof(bucket));
 	for(i=0; i<64; i++)
 		bucket[(buf[i]>>5)&07] += 1;

 	cs = 0.;
 	for(i=0; i<8; i++)
 		cs += (bucket[i]-8)*(bucket[i]-8);
 	cs /= 8.;
 	if(cs <= 24.322) {
 		if(buf[0]==0x1f && (buf[1]==0x8b || buf[1]==0x9d))
 			print(mime ? OCTET : "compressed\n");
 		else
 			print(mime ? OCTET : "encrypted\n");
 		return 1;
 	}
 	return 0;
 }

 /*
  * english by punctuation and frequencies
  */
 int
 isenglish(void)
 {
 	int vow, comm, rare, badpun, punct;
 	char *p;

 	if(guess != Fascii && guess != Feascii)
 		return 0;
 	badpun = 0;
 	punct = 0;
 	for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
 		switch(*p) {
 		case '.':
 		case ',':
 		case ')':
 		case '%':
 		case ';':
 		case ':':
 		case '?':
 			punct++;
 			if(p[1] != ' ' && p[1] != '\n')
 				badpun++;
 		}
 	if(badpun*5 > punct)
 		return 0;
 	if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e'])	/* shell file test */
 		return 0;
 	if(2*cfreq[';'] > cfreq['e'])
 		return 0;

 	vow = 0;
 	for(p="AEIOU"; *p; p++) {
 		vow += cfreq[(uchar)*p];
 		vow += cfreq[tolower((uchar)*p)];
 	}
 	comm = 0;
 	for(p="ETAION"; *p; p++) {
 		comm += cfreq[(uchar)*p];
 		comm += cfreq[tolower((uchar)*p)];
 	}
 	rare = 0;
 	for(p="VJKQXZ"; *p; p++) {
 		rare += cfreq[(uchar)*p];
 		rare += cfreq[tolower((uchar)*p)];
 	}
 	if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
 		print(mime ? PLAIN : "English text\n");
 		return 1;
 	}
 	return 0;
 }

 /*
  * pick up a number with
  * syntax _*[0-9]+_
  */
 #define	P9BITLEN	12
 int
 p9bitnum(uchar *bp)
 {
 	int n, c, len;

 	len = P9BITLEN;
 	while(*bp == ' ') {
 		bp++;
 		len--;
 		if(len <= 0)
 			return -1;
 	}
 	n = 0;
 	while(len > 1) {
 		c = *bp++;
 		if(!isdigit(c))
 			return -1;
 		n = n*10 + c-'0';
 		len--;
 	}
 	if(*bp != ' ')
 		return -1;
 	return n;
 }

 int
 depthof(char *s, int *newp)
 {
 	char *es;
 	int d;

 	*newp = 0;
 	es = s+12;
 	while(s<es && *s==' ')
 		s++;
 	if(s == es)
 		return -1;
 	if('0'<=*s && *s<='9')
 		return 1<<atoi(s);

 	*newp = 1;
 	d = 0;
 	while(s<es && *s!=' '){
 		s++;	/* skip letter */
 		d += strtoul(s, &s, 10);
 	}

 	switch(d){
 	case 32:
 	case 24:
 	case 16:
 	case 8:
 		return d;
 	}
 	return -1;
 }

 int
 isp9bit(void)
 {
 	int dep, lox, loy, hix, hiy, px, new;
 	ulong t;
 	long len;
 	char *newlabel;

 	newlabel = "old ";

 	dep = depthof((char*)buf + 0*P9BITLEN, &new);
 	if(new)
 		newlabel = "";
 	lox = p9bitnum(buf + 1*P9BITLEN);
 	loy = p9bitnum(buf + 2*P9BITLEN);
 	hix = p9bitnum(buf + 3*P9BITLEN);
 	hiy = p9bitnum(buf + 4*P9BITLEN);
 	if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0)
 		return 0;

 	if(dep < 8){
 		px = 8/dep;	/* pixels per byte */
 		/* set l to number of bytes of data per scan line */
 		if(lox >= 0)
 			len = (hix+px-1)/px - lox/px;
 		else{	/* make positive before divide */
 			t = (-lox)+px-1;
 			t = (t/px)*px;
 			len = (t+hix+px-1)/px;
 		}
 	}else
 		len = (hix-lox)*dep/8;
 	len *= (hiy-loy);		/* col length */
 	len += 5*P9BITLEN;		/* size of initial ascii */

 	/*
 	 * for image file, length is non-zero and must match calculation above
 	 * for /dev/window and /dev/screen the length is always zero
 	 * for subfont, the subfont header should follow immediately.
 	 */
 	if (len != 0 && mbuf->length == 0) {
 		print("%splan 9 image\n", newlabel);
 		return 1;
 	}
 	if (mbuf->length == len) {
 		print("%splan 9 image\n", newlabel);
 		return 1;
 	}
 	/* Ghostscript sometimes produces a little extra on the end */
 	if (mbuf->length < len+P9BITLEN) {
 		print("%splan 9 image\n", newlabel);
 		return 1;
 	}
 	if (p9subfont(buf+len)) {
 		print("%ssubfont file\n", newlabel);
 		return 1;
 	}
 	return 0;
 }

 int
 p9subfont(uchar *p)
 {
 	int n, h, a;

 		/* if image too big, assume it's a subfont */
 	if (p+3*P9BITLEN > buf+sizeof(buf))
 		return 1;

 	n = p9bitnum(p + 0*P9BITLEN);	/* char count */
 	if (n < 0)
 		return 0;
 	h = p9bitnum(p + 1*P9BITLEN);	/* height */
 	if (h < 0)
 		return 0;
 	a = p9bitnum(p + 2*P9BITLEN);	/* ascent */
 	if (a < 0)
 		return 0;
 	return 1;
 }

 #define	WHITESPACE(c)		((c) == ' ' || (c) == '\t' || (c) == '\n')

 int
 isp9font(void)
 {
 	uchar *cp, *p;
 	int i, n;
 	char pathname[1024];

 	cp = buf;
 	if (!getfontnum(cp, &cp))	/* height */
 		return 0;
 	if (!getfontnum(cp, &cp))	/* ascent */
 		return 0;
 	for (i = 0; 1; i++) {
 		if (!getfontnum(cp, &cp))	/* min */
 			break;
 		if (!getfontnum(cp, &cp))	/* max */
 			return 0;
 		while (WHITESPACE(*cp))
 			cp++;
 		for (p = cp; *cp && !WHITESPACE(*cp); cp++)
 				;
 			/* construct a path name, if needed */
 		n = 0;
 		if (*p != '/' && slash) {
 			n = slash-fname+1;
 			if (n < sizeof(pathname))
 				memcpy(pathname, fname, n);
 			else n = 0;
 		}
 		if (n+cp-p < sizeof(pathname)) {
 			memcpy(pathname+n, p, cp-p);
 			n += cp-p;
 			pathname[n] = 0;
 			if (access(pathname, AEXIST) < 0)
 				return 0;
 		}
 	}
 	if (i) {
 		print(mime ? "text/plain\n" : "font file\n");
 		return 1;
 	}
 	return 0;
 }

 int
 getfontnum(uchar *cp, uchar **rp)
 {
 	while (WHITESPACE(*cp))		/* extract ulong delimited by whitespace */
 		cp++;
 	if (*cp < '0' || *cp > '9')
 		return 0;
 	strtoul((char *)cp, (char **)rp, 0);
 	if (!WHITESPACE(**rp))
 		return 0;
 	return 1;
 }

 int
 isrtf(void)
 {
 	if(strstr((char *)buf, "\\rtf1")){
 		print(mime ? "application/rtf\n" : "rich text format\n");
 		return 1;
 	}
 	return 0;
 }

 int
 ismsdos(void)
 {
 	if (buf[0] == 0x4d && buf[1] == 0x5a){
 		print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
 		return 1;
 	}
 	return 0;
 }

 int
 iself(void)
 {
 	static char *cpu[] = {		/* NB: incomplete and arbitary list */
 		nil,
 	/*1*/	"WE32100",
 	/*2*/	"SPARC",
 	/*3*/	"i386",
 	/*4*/	"M68000",
 	/*5*/	"M88000",
 	/*6*/	"i486",
 	/*7*/	"i860",
 	/*8*/	"R3000",
 	/*9*/	"S370",
 	/*10*/	"R4000",
 		nil, nil, nil, nil,
 	/*15*/	"HP-PA",
 		nil,
 		nil,
 	/*18*/	"sparc v8+",
 	/*19*/	"i960",
 	/*20*/	"PPC-32",
 	/*21*/	"PPC-64",
 		nil, nil, nil, nil,
 		nil, nil, nil, nil, nil,
 		nil, nil, nil, nil, nil,
 		nil, nil, nil, nil,
 	/*40*/	"ARM",
 	/*41*/	"Alpha",
 		nil,
 	/*43*/	"sparc v9",
 		nil, nil,
 		nil, nil, nil, nil,
 	/*50*/	"IA-64",
 		nil, nil, nil, nil, nil,
 		nil, nil, nil, nil, nil,
 		nil,
 	/*62*/	"AMD64",
 		nil, nil, nil,
 		nil, nil, nil, nil, nil,
 		nil, nil, nil, nil,
 	/*75*/	"VAX",
 	};


 	if (memcmp(buf, "\177ELF", 4) == 0){
 		/* gcc misparses \x7FELF as \x7FE L F */
 		if (!mime){
 			int n = (buf[19] << 8) | buf[18];
 			char *p = "unknown";

 			if (n > 0 && n < nelem(cpu) && cpu[n])
 				p = cpu[n];
 			else {
 				/* try the other byte order */
 				n = (buf[18] << 8) | buf[19];
 				if (n > 0 && n < nelem(cpu) && cpu[n])
 					p = cpu[n];
 			}
 			print("%s ELF executable\n", p);
 		}
 		else
 			print("application/x-elf-executable");
 		return 1;
 	}

 	return 0;
 }