src/cmd/upas/scanmail/common.c - plan9 - Git at Google

 #include <u.h>
 #include <libc.h>
 #include <bio.h>
 #include <regexp.h>
 #include "spam.h"

 enum {
 	Quanta	= 8192,
 	Minbody = 6000,
 	HdrMax	= 15
 };

 typedef struct keyword Keyword;
 typedef struct word Word;

 struct word{
 	char	*string;
 	int	n;
 };

 struct	keyword{
 	char	*string;
 	int	value;
 };

 Word	htmlcmds[] =
 {
 	"html",		4,
 	"!doctype html", 13,
 	0,

 };

 Word	hrefs[] =
 {
 	"a href=",	7,
 	"a title=",	8,
 	"a target=",	9,
 	"base href=",	10,
 	"img src=",	8,
 	"img border=",	11,
 	"form action=", 12,
 	"!--",		3,
 	0,

 };

 /*
  *	RFC822 header keywords to look for for fractured header.
  *	all lengths must be less than HdrMax defined above.
  */
 Word	hdrwords[] =
 {
 	"cc:",			3,
 	"bcc:", 		4,
 	"to:",			3,
 	0,			0,

 };

 Keyword	keywords[] =
 {
 	"header",	HoldHeader,
 	"line",		SaveLine,
 	"hold",		Hold,
 	"dump",		Dump,
 	"loff",		Lineoff,
 	0,		Nactions
 };

 Patterns patterns[] = {
 [Dump]		{ "DUMP:", 0, 0 },
 [HoldHeader]	{ "HEADER:", 0, 0 },
 [Hold]		{ "HOLD:", 0, 0 },
 [SaveLine]	{ "LINE:", 0, 0 },
 [Lineoff]	{ "LINEOFF:", 0, 0 },
 [Nactions]	{ 0, 0, 0 }
 };

 static char*	endofhdr(char*, char*);
 static	int	escape(char**);
 static	int	extract(char*);
 static	int	findkey(char*);
 static	int	hash(int);
 static	int	isword(Word*, char*, int);
 static	void	parsealt(Biobuf*, char*, Spat**);

 /*
  *	The canonicalizer: convert input to canonical representation
  */
 char*
 readmsg(Biobuf *bp, int *hsize, int *bufsize)
 {
 	char *p, *buf;
 	int n, offset, eoh, bsize, delta;

 	buf = 0;
 	offset = 0;
 	if(bufsize)
 		*bufsize = 0;
 	if(hsize)
 		*hsize = 0;
 	for(;;) {
 		buf = Realloc(buf, offset+Quanta+1);
 		n = Bread(bp, buf+offset, Quanta);
 		if(n < 0){
 			free(buf);
 			return 0;
 		}
 		p = buf+offset;			/* start of this chunk */
 		offset += n;			/* end of this chunk */
 		buf[offset] = 0;
 		if(n == 0){
 			if(offset == 0)
 				return 0;
 			break;
 		}

 		if(hsize == 0)			/* don't process header */
 			break;
 		if(p != buf && p[-1] == '\n')	/* check for EOH across buffer split */
 			p--;
 		p = endofhdr(p, buf+offset);
 		if(p)
 			break;
 		if(offset >= Maxread)		/* gargantuan header - just punt*/
 		{
 			if(hsize)
 				*hsize = offset;
 			if(bufsize)
 				*bufsize = offset;
 			return buf;
 		}
 	}
 	eoh = p-buf;				/* End of header */
 	bsize = offset - eoh;			/* amount of body already read */

 		/* Read at least Minbody bytes of the body */
 	if (bsize < Minbody){
 		delta = Minbody-bsize;
 		buf = Realloc(buf, offset+delta+1);
 		n = Bread(bp, buf+offset, delta);
 		if(n > 0) {
 			offset += n;
 			buf[offset] = 0;
 		}
 	}
 	if(hsize)
 		*hsize = eoh;
 	if(bufsize)
 		*bufsize = offset;
 	return buf;
 }

 static	int
 isword(Word *wp, char *text, int len)
 {
 	for(;wp->string; wp++)
 		if(len >= wp->n && strncmp(text, wp->string, wp->n) == 0)
 			return 1;
 	return 0;
 }

 static char*
 endofhdr(char *raw, char *end)
 {
 	int i;
 	char *p, *q;
 	char buf[HdrMax];

 	/*
  	 * can't use strchr to search for newlines because
 	 * there may be embedded NULL's.
 	 */
 	for(p = raw; p < end; p++){
 		if(*p != '\n' || p[1] != '\n')
 			continue;
 		p++;
 		for(i = 0, q = p+1; i < sizeof(buf) && *q; q++){
 			buf[i++] = tolower(*q);
 			if(*q == ':' || *q == '\n')
 				break;
 		}
 		if(!isword(hdrwords, buf, i))
 			return p+1;
 	}
 	return 0;
 }

 static	int
 htmlmatch(Word *wp, char *text, char *end, int *n)
 {
 	char *cp;
 	int i, c, lastc;
 	char buf[MaxHtml];

 	/*
 	 * extract a string up to '>'
 	 */

 	i = lastc = 0;
 	cp = text;
 	while (cp < end && i < sizeof(buf)-1){
 		c = *cp++;
 		if(c == '=')
 			c = escape(&cp);
 		switch(c){
 		case 0:
 		case '\r':
 			continue;
 		case '>':
 			goto out;
 		case '\n':
 		case ' ':
 		case '\t':
 			if(lastc == ' ')
 				continue;
 			c = ' ';
 			break;
 		default:
 			c = tolower(c);
 			break;
 		}
 		buf[i++] = lastc = c;
 	}
 out:
 	buf[i] = 0;
 	if(n)
 		*n = cp-text;
 	return isword(wp, buf, i);
 }

 static int
 escape(char **msg)
 {
 	int c;
 	char *p;

 	p = *msg;
 	c = *p;
 	if(c == '\n'){
 		p++;
 		c = *p++;
 	} else
 	if(c == '2'){
 		c = tolower(p[1]);
 		if(c == 'e'){
 			p += 2;
 			c = '.';
 		}else
 		if(c == 'f'){
 			p += 2;
 			c = '/';
 		}else
 		if(c == '0'){
 			p += 2;
 			c = ' ';
 		}
 		else c = '=';
 	} else {
 		if(c == '3' && tolower(p[1]) == 'd')
 			p += 2;
 		c = '=';
 	}
 	*msg = p;
 	return c;
 }

 static int
 htmlchk(char **msg, char *end)
 {
 	int n;
 	char *p;

 	static int ishtml;

 	p = *msg;
 	if(ishtml == 0){
 		ishtml = htmlmatch(htmlcmds, p, end, &n);

 		/* If not an HTML keyword, check if it's
 		 * an HTML comment (<!comment>).  if so,
 		 * skip over it; otherwise copy it in.
 		 */
 		if(ishtml == 0 && *p != '!')	/* not comment */
 			return '<';		/* copy it */

 	} else if(htmlmatch(hrefs, p, end, &n))	/* if special HTML string  */
 		return '<';			/* copy it */

 	/*
 	 * this is an uninteresting HTML command; skip over it.
 	 */
 	p += n;
 	*msg = p+1;
 	return *p;
 }

 /*
  * decode a base 64 encode body
  */
 void
 conv64(char *msg, char *end, char *buf, int bufsize)
 {
 	int len, i;
 	char *cp;

 	len = end - msg;
 	i = (len*3)/4+1;	/* room for max chars + null */
 	cp = Malloc(i);
 	len = dec64((uchar*)cp, i, msg, len);
 	convert(cp, cp+len, buf, bufsize, 1);
 	free(cp);
 }

 int
 convert(char *msg, char *end, char *buf, int bufsize, int isbody)
 {

 	char *p;
 	int c, lastc, base64;

 	lastc = 0;
 	base64 = 0;
 	while(msg < end && bufsize > 0){
 		c = *msg++;

 		/*
 		 * In the body only, try to strip most HTML and
 		 * replace certain MIME escape sequences with the character
 		 */
 		if(isbody) {
 			do{
 				p = msg;
 				if(c == '<')
 					c = htmlchk(&msg, end);
 				if(c == '=')
 					c = escape(&msg);
 			} while(p != msg && p < end);
 		}
 		switch(c){
 		case 0:
 		case '\r':
 			continue;
 		case '\t':
 		case ' ':
 		case '\n':
 			if(lastc == ' ')
 				continue;
 			c = ' ';
 			break;
 		case 'C':	/* check for MIME base 64 encoding in header */
 		case 'c':
 			if(isbody == 0)
 			if(msg < end-32 && *msg == 'o' && msg[1] == 'n')
 			if(cistrncmp(msg+2, "tent-transfer-encoding: base64", 30) == 0)
 				base64 = 1;
 			c = 'c';
 			break;
 		default:
 			c = tolower(c);
 			break;
 		}
 		*buf++ = c;
 		lastc = c;
 		bufsize--;
 	}
 	*buf = 0;
 	return base64;
 }

 /*
  *	The pattern parser: build data structures from the pattern file
  */

 static int
 hash(int c)
 {
 	return c & 127;
 }

 static	int
 findkey(char *val)
 {
 	Keyword *kp;

 	for(kp = keywords; kp->string; kp++)
 		if(strcmp(val, kp->string) == 0)
 				break;
 	return kp->value;
 }

 #define	whitespace(c)	((c) == ' ' || (c) == '\t')

 void
 parsepats(Biobuf *bp)
 {
 	Pattern *p, *new;
 	char *cp, *qp;
 	int type, action, n, h;
 	Spat *spat;

 	for(;;){
 		cp = Brdline(bp, '\n');
 		if(cp == 0)
 			break;
 		cp[Blinelen(bp)-1] = 0;
 		while(*cp == ' ' || *cp == '\t')
 			cp++;
 		if(*cp == '#' || *cp == 0)
 			continue;
 		type = regexp;
 		if(*cp == '*'){
 			type = string;
 			cp++;
 		}
 		qp = strchr(cp, ':');
 		if(qp == 0)
 			continue;
 		*qp = 0;
 		if(debug)
 			fprint(2, "action = %s\n", cp);
 		action = findkey(cp);
 		if(action >= Nactions)
 			continue;
 		cp = qp+1;
 		n = extract(cp);
 		if(n <= 0 || *cp == 0)
 			continue;

 		qp = strstr(cp, "~~");
 		if(qp){
 			*qp = 0;
 			n = strlen(cp);
 		}
 		if(debug)
 			fprint(2, " Pattern: `%s'\n", cp);

 			/* Hook regexps into a chain */
 		if(type == regexp) {
 			new = Malloc(sizeof(Pattern));
 			new->action = action;
 			new->pat = regcomp(cp);
 			if(new->pat == 0){
 				free(new);
 				continue;
 			}
 			new->type = regexp;
 			new->alt = 0;
 			new->next = 0;

 			if(qp)
 				parsealt(bp, qp+2, &new->alt);

 			new->next = patterns[action].regexps;
 			patterns[action].regexps = new;
 			continue;

 		}
 			/* not a Regexp - hook strings into Pattern hash chain */
 		spat = Malloc(sizeof(*spat));
 		spat->next = 0;
 		spat->alt = 0;
 		spat->len = n;
 		spat->string = Malloc(n+1);
 		spat->c1 = cp[1];
 		strcpy(spat->string, cp);

 		if(qp)
 			parsealt(bp, qp+2, &spat->alt);

 		p = patterns[action].strings;
 		if(p == 0) {
 			p = Malloc(sizeof(Pattern));
 			memset(p, 0, sizeof(*p));
 			p->action = action;
 			p->type = string;
 			patterns[action].strings = p;
 		}
 		h = hash(*spat->string);
 		spat->next = p->spat[h];
 		p->spat[h] = spat;
 	}
 }

 static void
 parsealt(Biobuf *bp, char *cp, Spat** head)
 {
 	char *p;
 	Spat *alt;

 	while(cp){
 		if(*cp == 0){		/*escaped newline*/
 			do{
 				cp = Brdline(bp, '\n');
 				if(cp == 0)
 					return;
 				cp[Blinelen(bp)-1] = 0;
 			} while(extract(cp) <= 0 || *cp == 0);
 		}

 		p = cp;
 		cp = strstr(p, "~~");
 		if(cp){
 			*cp = 0;
 			cp += 2;
 		}
 		if(strlen(p)){
 			alt = Malloc(sizeof(*alt));
 			alt->string = strdup(p);
 			alt->next = *head;
 			*head = alt;
 		}
 	}
 }

 static int
 extract(char *cp)
 {
 	int c;
 	char *p, *q, *r;

 	p = q = r = cp;
 	while(whitespace(*p))
 		p++;
 	while(c = *p++){
 		if (c == '#')
 			break;
 		if(c == '"'){
 			while(*p && *p != '"'){
 				if(*p == '\\' && p[1] == '"')
 					p++;
 				if('A' <= *p && *p <= 'Z')
 					*q++ = *p++ + ('a'-'A');
 				else
 					*q++ = *p++;
 			}
 			if(*p)
 				p++;
 			r = q;		/* never back up over a quoted string */
 		} else {
 			if('A' <= c && c <= 'Z')
 				c += ('a'-'A');
 			*q++ = c;
 		}
 	}
 	while(q > r && whitespace(q[-1]))
 		q--;
 	*q = 0;
 	return q-cp;
 }

 /*
  *	The matching engine: compare canonical input to pattern structures
  */

 static Spat*
 isalt(char *message, Spat *alt)
 {
 	while(alt) {
 		if(*cmd)
 		if(message != cmd && strstr(cmd, alt->string))
 			break;
 		if(message != header+1 && strstr(header+1, alt->string))
 			break;
 		if(strstr(message, alt->string))
 			break;
 		alt = alt->next;
 	}
 	return alt;
 }

 int
 matchpat(Pattern *p, char *message, Resub *m)
 {
 	Spat *spat;
 	char *s;
 	int c, c1;

 	if(p->type == string){
 		c1 = *message;
 		for(s=message; c=c1; s++){
 			c1 = s[1];
 			for(spat=p->spat[hash(c)]; spat; spat=spat->next){
 				if(c1 == spat->c1)
 				if(memcmp(s, spat->string, spat->len) == 0)
 				if(!isalt(message, spat->alt)){
 					m->s.sp = s;
 					m->e.ep = s + spat->len;
 					return 1;
 				}
 			}
 		}
 		return 0;
 	}
 	m->s.sp = m->e.ep = 0;
 	if(regexec(p->pat, message, m, 1) == 0)
 		return 0;
 	if(isalt(message, p->alt))
 		return 0;
 	return 1;
 }


 void
 xprint(int fd, char *type, Resub *m)
 {
 	char *p, *q;
 	int i;

 	if(m->s.sp == 0 || m->e.ep == 0)
 		return;

 		/* back up approx 30 characters to whitespace */
 	for(p = m->s.sp, i = 0; *p && i < 30; i++, p--)
 			;
 	while(*p && *p != ' ')
 		p--;
 	p++;

 		/* grab about 30 more chars beyond the end of the match */
 	for(q = m->e.ep, i = 0; *q && i < 30; i++, q++)
 			;
 	while(*q && *q != ' ')
 		q++;

 	fprint(fd, "%s %.*s~%.*s~%.*s\n", type, (int)(m->s.sp-p), p, (int)(m->e.ep-m->s.sp), m->s.sp, (int)(q-m->e.ep), m->e.ep);
 }

 enum {
 	INVAL=	255
 };

 static uchar t64d[256] = {
 /*00 */	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 /*10*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 /*20*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 	INVAL, INVAL, INVAL,    62, INVAL, INVAL, INVAL,    63,
 /*30*/	   52,	  53,	 54,	55,    56,    57,    58,    59,
 	   60,	  61, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 /*40*/	INVAL,    0,      1,     2,     3,     4,     5,     6,
 	    7,    8,      9,    10,    11,    12,    13,    14,
 /*50*/	   15,   16,     17,    18,    19,    20,    21,    22,
 	   23,   24,     25, INVAL, INVAL, INVAL, INVAL, INVAL,
 /*60*/	INVAL,   26,     27,    28,    29,    30,    31,    32,
 	   33,   34,     35,    36,    37,    38,    39,    40,
 /*70*/	   41,   42,     43,    44,    45,    46,    47,    48,
 	   49,   50,     51, INVAL, INVAL, INVAL, INVAL, INVAL,
 /*80*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 /*90*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 /*A0*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 /*B0*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 /*C0*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 /*D0*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 /*E0*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 /*F0*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
 	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL
 };
	#include <u.h>
	#include <libc.h>
	#include <bio.h>
	#include <regexp.h>
	#include "spam.h"

	enum {
	Quanta = 8192,
	Minbody = 6000,
	HdrMax = 15
	};

	typedef struct keyword Keyword;
	typedef struct word Word;

	struct word{
	char *string;
	int n;
	};

	struct keyword{
	char *string;
	int value;
	};

	Word htmlcmds[] =
	{
	"html", 4,
	"!doctype html", 13,
	0,

	};

	Word hrefs[] =
	{
	"a href=", 7,
	"a title=", 8,
	"a target=", 9,
	"base href=", 10,
	"img src=", 8,
	"img border=", 11,
	"form action=", 12,
	"!--", 3,
	0,

	};

	/*
	* RFC822 header keywords to look for for fractured header.
	* all lengths must be less than HdrMax defined above.
	*/
	Word hdrwords[] =
	{
	"cc:", 3,
	"bcc:", 4,
	"to:", 3,
	0, 0,

	};

	Keyword keywords[] =
	{
	"header", HoldHeader,
	"line", SaveLine,
	"hold", Hold,
	"dump", Dump,
	"loff", Lineoff,
	0, Nactions
	};

	Patterns patterns[] = {
	[Dump] { "DUMP:", 0, 0 },
	[HoldHeader] { "HEADER:", 0, 0 },
	[Hold] { "HOLD:", 0, 0 },
	[SaveLine] { "LINE:", 0, 0 },
	[Lineoff] { "LINEOFF:", 0, 0 },
	[Nactions] { 0, 0, 0 }
	};

	static char* endofhdr(char, char);
	static int escape(char**);
	static int extract(char*);
	static int findkey(char*);
	static int hash(int);
	static int isword(Word, char, int);
	static void parsealt(Biobuf, char, Spat**);

	/*
	* The canonicalizer: convert input to canonical representation
	*/
	char*
	readmsg(Biobuf bp, int hsize, int *bufsize)
	{
	char p, buf;
	int n, offset, eoh, bsize, delta;

	buf = 0;
	offset = 0;
	if(bufsize)
	*bufsize = 0;
	if(hsize)
	*hsize = 0;
	for(;;) {
	buf = Realloc(buf, offset+Quanta+1);
	n = Bread(bp, buf+offset, Quanta);
	if(n < 0){
	free(buf);
	return 0;
	}
	p = buf+offset; /* start of this chunk */
	offset += n; /* end of this chunk */
	buf[offset] = 0;
	if(n == 0){
	if(offset == 0)
	return 0;
	break;
	}

	if(hsize == 0) /* don't process header */
	break;
	if(p != buf && p[-1] == '\n') /* check for EOH across buffer split */
	p--;
	p = endofhdr(p, buf+offset);
	if(p)
	break;
	if(offset >= Maxread) /* gargantuan header - just punt*/
	{
	if(hsize)
	*hsize = offset;
	if(bufsize)
	*bufsize = offset;
	return buf;
	}
	}
	eoh = p-buf; /* End of header */
	bsize = offset - eoh; /* amount of body already read */

	/* Read at least Minbody bytes of the body */
	if (bsize < Minbody){
	delta = Minbody-bsize;
	buf = Realloc(buf, offset+delta+1);
	n = Bread(bp, buf+offset, delta);
	if(n > 0) {
	offset += n;
	buf[offset] = 0;
	}
	}
	if(hsize)
	*hsize = eoh;
	if(bufsize)
	*bufsize = offset;
	return buf;
	}

	static int
	isword(Word wp, char text, int len)
	{
	for(;wp->string; wp++)
	if(len >= wp->n && strncmp(text, wp->string, wp->n) == 0)
	return 1;
	return 0;
	}

	static char*
	endofhdr(char raw, char end)
	{
	int i;
	char p, q;
	char buf[HdrMax];

	/*
	* can't use strchr to search for newlines because
	* there may be embedded NULL's.
	*/
	for(p = raw; p < end; p++){
	if(*p != '\n' \|\| p[1] != '\n')
	continue;
	p++;
	for(i = 0, q = p+1; i < sizeof(buf) && *q; q++){
	buf[i++] = tolower(*q);
	if(q == ':' \|\| q == '\n')
	break;
	}
	if(!isword(hdrwords, buf, i))
	return p+1;
	}
	return 0;
	}

	static int
	htmlmatch(Word wp, char text, char end, int n)
	{
	char *cp;
	int i, c, lastc;
	char buf[MaxHtml];

	/*
	* extract a string up to '>'
	*/

	i = lastc = 0;
	cp = text;
	while (cp < end && i < sizeof(buf)-1){
	c = *cp++;
	if(c == '=')
	c = escape(&cp);
	switch(c){
	case 0:
	case '\r':
	continue;
	case '>':
	goto out;
	case '\n':
	case ' ':
	case '\t':
	if(lastc == ' ')
	continue;
	c = ' ';
	break;
	default:
	c = tolower(c);
	break;
	}
	buf[i++] = lastc = c;
	}
	out:
	buf[i] = 0;
	if(n)
	*n = cp-text;
	return isword(wp, buf, i);
	}

	static int
	escape(char **msg)
	{
	int c;
	char *p;

	p = *msg;
	c = *p;
	if(c == '\n'){
	p++;
	c = *p++;
	} else
	if(c == '2'){
	c = tolower(p[1]);
	if(c == 'e'){
	p += 2;
	c = '.';
	}else
	if(c == 'f'){
	p += 2;
	c = '/';
	}else
	if(c == '0'){
	p += 2;
	c = ' ';
	}
	else c = '=';
	} else {
	if(c == '3' && tolower(p[1]) == 'd')
	p += 2;
	c = '=';
	}
	*msg = p;
	return c;
	}

	static int
	htmlchk(char *msg, char end)
	{
	int n;
	char *p;

	static int ishtml;

	p = *msg;
	if(ishtml == 0){
	ishtml = htmlmatch(htmlcmds, p, end, &n);

	/* If not an HTML keyword, check if it's
	* an HTML comment (<!comment>). if so,
	* skip over it; otherwise copy it in.
	*/
	if(ishtml == 0 && p != '!') / not comment */
	return '<'; /* copy it */

	} else if(htmlmatch(hrefs, p, end, &n)) /* if special HTML string */
	return '<'; /* copy it */

	/*
	* this is an uninteresting HTML command; skip over it.
	*/
	p += n;
	*msg = p+1;
	return *p;
	}

	/*
	* decode a base 64 encode body
	*/
	void
	conv64(char msg, char end, char *buf, int bufsize)
	{
	int len, i;
	char *cp;

	len = end - msg;
	i = (len3)/4+1; / room for max chars + null */
	cp = Malloc(i);
	len = dec64((uchar*)cp, i, msg, len);
	convert(cp, cp+len, buf, bufsize, 1);
	free(cp);
	}

	int
	convert(char msg, char end, char *buf, int bufsize, int isbody)
	{

	char *p;
	int c, lastc, base64;

	lastc = 0;
	base64 = 0;
	while(msg < end && bufsize > 0){
	c = *msg++;

	/*
	* In the body only, try to strip most HTML and
	* replace certain MIME escape sequences with the character
	*/
	if(isbody) {
	do{
	p = msg;
	if(c == '<')
	c = htmlchk(&msg, end);
	if(c == '=')
	c = escape(&msg);
	} while(p != msg && p < end);
	}
	switch(c){
	case 0:
	case '\r':
	continue;
	case '\t':
	case ' ':
	case '\n':
	if(lastc == ' ')
	continue;
	c = ' ';
	break;
	case 'C': /* check for MIME base 64 encoding in header */
	case 'c':
	if(isbody == 0)
	if(msg < end-32 && *msg == 'o' && msg[1] == 'n')
	if(cistrncmp(msg+2, "tent-transfer-encoding: base64", 30) == 0)
	base64 = 1;
	c = 'c';
	break;
	default:
	c = tolower(c);
	break;
	}
	*buf++ = c;
	lastc = c;
	bufsize--;
	}
	*buf = 0;
	return base64;
	}

	/*
	* The pattern parser: build data structures from the pattern file
	*/

	static int
	hash(int c)
	{
	return c & 127;
	}

	static int
	findkey(char *val)
	{
	Keyword *kp;

	for(kp = keywords; kp->string; kp++)
	if(strcmp(val, kp->string) == 0)
	break;
	return kp->value;
	}

	#define whitespace(c) ((c) == ' ' \|\| (c) == '\t')

	void
	parsepats(Biobuf *bp)
	{
	Pattern p, new;
	char cp, qp;
	int type, action, n, h;
	Spat *spat;

	for(;;){
	cp = Brdline(bp, '\n');
	if(cp == 0)
	break;
	cp[Blinelen(bp)-1] = 0;
	while(cp == ' ' \|\| cp == '\t')
	cp++;
	if(cp == '#' \|\| cp == 0)
	continue;
	type = regexp;
	if(cp == ''){
	type = string;
	cp++;
	}
	qp = strchr(cp, ':');
	if(qp == 0)
	continue;
	*qp = 0;
	if(debug)
	fprint(2, "action = %s\n", cp);
	action = findkey(cp);
	if(action >= Nactions)
	continue;
	cp = qp+1;
	n = extract(cp);
	if(n <= 0 \|\| *cp == 0)
	continue;

	qp = strstr(cp, "~~");
	if(qp){
	*qp = 0;
	n = strlen(cp);
	}
	if(debug)
	fprint(2, " Pattern: `%s'\n", cp);

	/* Hook regexps into a chain */
	if(type == regexp) {
	new = Malloc(sizeof(Pattern));
	new->action = action;
	new->pat = regcomp(cp);
	if(new->pat == 0){
	free(new);
	continue;
	}
	new->type = regexp;
	new->alt = 0;
	new->next = 0;

	if(qp)
	parsealt(bp, qp+2, &new->alt);

	new->next = patterns[action].regexps;
	patterns[action].regexps = new;
	continue;

	}
	/* not a Regexp - hook strings into Pattern hash chain */
	spat = Malloc(sizeof(*spat));
	spat->next = 0;
	spat->alt = 0;
	spat->len = n;
	spat->string = Malloc(n+1);
	spat->c1 = cp[1];
	strcpy(spat->string, cp);

	if(qp)
	parsealt(bp, qp+2, &spat->alt);

	p = patterns[action].strings;
	if(p == 0) {
	p = Malloc(sizeof(Pattern));
	memset(p, 0, sizeof(*p));
	p->action = action;
	p->type = string;
	patterns[action].strings = p;
	}
	h = hash(*spat->string);
	spat->next = p->spat[h];
	p->spat[h] = spat;
	}
	}

	static void
	parsealt(Biobuf bp, char cp, Spat** head)
	{
	char *p;
	Spat *alt;

	while(cp){
	if(cp == 0){ /escaped newline*/
	do{
	cp = Brdline(bp, '\n');
	if(cp == 0)
	return;
	cp[Blinelen(bp)-1] = 0;
	} while(extract(cp) <= 0 \|\| *cp == 0);
	}

	p = cp;
	cp = strstr(p, "~~");
	if(cp){
	*cp = 0;
	cp += 2;
	}
	if(strlen(p)){
	alt = Malloc(sizeof(*alt));
	alt->string = strdup(p);
	alt->next = *head;
	*head = alt;
	}
	}
	}

	static int
	extract(char *cp)
	{
	int c;
	char p, q, *r;

	p = q = r = cp;
	while(whitespace(*p))
	p++;
	while(c = *p++){
	if (c == '#')
	break;
	if(c == '"'){
	while(p && p != '"'){
	if(*p == '\\' && p[1] == '"')
	p++;
	if('A' <= p && p <= 'Z')
	q++ = p++ + ('a'-'A');
	else
	q++ = p++;
	}
	if(*p)
	p++;
	r = q; /* never back up over a quoted string */
	} else {
	if('A' <= c && c <= 'Z')
	c += ('a'-'A');
	*q++ = c;
	}
	}
	while(q > r && whitespace(q[-1]))
	q--;
	*q = 0;
	return q-cp;
	}

	/*
	* The matching engine: compare canonical input to pattern structures
	*/

	static Spat*
	isalt(char message, Spat alt)
	{
	while(alt) {
	if(*cmd)
	if(message != cmd && strstr(cmd, alt->string))
	break;
	if(message != header+1 && strstr(header+1, alt->string))
	break;
	if(strstr(message, alt->string))
	break;
	alt = alt->next;
	}
	return alt;
	}

	int
	matchpat(Pattern p, char message, Resub *m)
	{
	Spat *spat;
	char *s;
	int c, c1;

	if(p->type == string){
	c1 = *message;
	for(s=message; c=c1; s++){
	c1 = s[1];
	for(spat=p->spat[hash(c)]; spat; spat=spat->next){
	if(c1 == spat->c1)
	if(memcmp(s, spat->string, spat->len) == 0)
	if(!isalt(message, spat->alt)){
	m->s.sp = s;
	m->e.ep = s + spat->len;
	return 1;
	}
	}
	}
	return 0;
	}
	m->s.sp = m->e.ep = 0;
	if(regexec(p->pat, message, m, 1) == 0)
	return 0;
	if(isalt(message, p->alt))
	return 0;
	return 1;
	}


	void
	xprint(int fd, char type, Resub m)
	{
	char p, q;
	int i;

	if(m->s.sp == 0 \|\| m->e.ep == 0)
	return;

	/* back up approx 30 characters to whitespace */
	for(p = m->s.sp, i = 0; *p && i < 30; i++, p--)
	;
	while(p && p != ' ')
	p--;
	p++;

	/* grab about 30 more chars beyond the end of the match */
	for(q = m->e.ep, i = 0; *q && i < 30; i++, q++)
	;
	while(q && q != ' ')
	q++;

	fprint(fd, "%s %.s~%.s~%.*s\n", type, (int)(m->s.sp-p), p, (int)(m->e.ep-m->s.sp), m->s.sp, (int)(q-m->e.ep), m->e.ep);
	}

	enum {
	INVAL= 255
	};

	static uchar t64d[256] = {
	/00 / INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	/10/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	/20/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	INVAL, INVAL, INVAL, 62, INVAL, INVAL, INVAL, 63,
	/30/ 52, 53, 54, 55, 56, 57, 58, 59,
	60, 61, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	/40/ INVAL, 0, 1, 2, 3, 4, 5, 6,
	7, 8, 9, 10, 11, 12, 13, 14,
	/50/ 15, 16, 17, 18, 19, 20, 21, 22,
	23, 24, 25, INVAL, INVAL, INVAL, INVAL, INVAL,
	/60/ INVAL, 26, 27, 28, 29, 30, 31, 32,
	33, 34, 35, 36, 37, 38, 39, 40,
	/70/ 41, 42, 43, 44, 45, 46, 47, 48,
	49, 50, 51, INVAL, INVAL, INVAL, INVAL, INVAL,
	/80/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	/90/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	/A0/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	/B0/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	/C0/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	/D0/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	/E0/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	/F0/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL
	};