Import version of libhtml that might actually work with ANSI C.

commit: 7cf289ca89a7416999ae02330236042b0d37e3db [log] [tgz]
author: wkj <devnull@localhost> Tue Apr 06 19:06:52 2004 +0000
committer: wkj <devnull@localhost> Tue Apr 06 19:06:52 2004 +0000
tree: 796d1363a7a53c72c28b199758ee674f1326a510
parent: 3e3817f7c86658f60715dd93768eaf8285807985 [diff]
diff --git a/src/cmd/htmlfmt/dat.h b/src/cmd/htmlfmt/dat.h
new file mode 100644
index 0000000..f3b0560
--- /dev/null
+++ b/src/cmd/htmlfmt/dat.h

@@ -0,0 +1,50 @@
+typedef struct Bytes Bytes;
+typedef struct URLwin URLwin;
+
+enum
+{
+	STACK		= 8192,
+	EVENTSIZE	= 256,
+};
+
+struct Bytes
+{
+	uchar	*b;
+	long		n;
+	long		nalloc;
+};
+
+struct URLwin
+{
+	int		infd;
+	int		outfd;
+	int		type;
+
+	char		*url;
+	Item		*items;
+	Docinfo	*docinfo;
+};
+
+extern	char*	url;
+extern	int		aflag;
+extern	int		width;
+extern	int		defcharset;
+
+extern	char*	loadhtml(int);
+
+extern	char*	readfile(char*, char*, int*);
+extern	int	charset(char*);
+extern	void*	emalloc(ulong);
+extern	char*	estrdup(char*);
+extern	char*	estrstrdup(char*, char*);
+extern	char*	egrow(char*, char*, char*);
+extern	char*	eappend(char*, char*, char*);
+extern	void		error(char*, ...);
+
+extern	void		growbytes(Bytes*, char*, long);
+
+extern	void		rendertext(URLwin*, Bytes*);
+extern	void		rerender(URLwin*);
+extern	void		freeurlwin(URLwin*);
+
+#pragma	varargck	argpos	error	1

diff --git a/src/cmd/htmlfmt/html.c b/src/cmd/htmlfmt/html.c
new file mode 100644
index 0000000..4f2e436
--- /dev/null
+++ b/src/cmd/htmlfmt/html.c

@@ -0,0 +1,331 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <draw.h>
+#include <regexp.h>
+#include <html.h>
+#include <ctype.h>
+#include "dat.h"
+
+char urlexpr[] = "^(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero)://([a-zA-Z0-9_@\\-]+([.:][a-zA-Z0-9_@\\-]+)*)";
+Reprog	*urlprog;
+
+int inword = 0;
+int col = 0;
+int wordi = 0;
+
+char*
+loadhtml(int fd)
+{
+	URLwin *u;
+	Bytes *b;
+	int n;
+	char buf[4096];
+
+	u = emalloc(sizeof(URLwin));
+	u->infd = fd;
+	u->outfd = 1;
+	u->url = estrdup(url);
+	u->type = TextHtml;
+
+	b = emalloc(sizeof(Bytes));
+	while((n = read(fd, buf, sizeof buf)) > 0)
+		growbytes(b, buf, n);
+	if(b->b == nil)
+		return nil;	/* empty file */
+	rendertext(u, b);
+	freeurlwin(u);
+	return nil;
+}
+
+char*
+runetobyte(Rune *r, int n)
+{
+	char *s;
+
+	if(n == 0)
+		return emalloc(1);
+	s = smprint("%.*S", n, r);
+	if(s == nil)
+		error("malloc failed");
+	return s;
+}
+
+int
+closingpunct(int c)
+{
+	return strchr(".,:;'\")]}>!?", c) != nil;
+}
+
+void
+emitword(Bytes *b, Rune *r, int nr)
+{
+	char *s;
+	int space;
+
+	if(nr == 0)
+		return;
+	s = smprint("%.*S", nr, r);
+	space = (b->n>0) && !isspace(b->b[b->n-1]) && !closingpunct(r[0]);
+	if(col>0 && col+space+nr > width){
+		growbytes(b, "\n", 1);
+		space = 0;
+		col = 0;
+	}
+	if(space && col>0){
+		growbytes(b, " ", 1);
+		col++;
+	}
+	growbytes(b, s, strlen(s));
+	col += nr;
+	free(s);
+	inword = 0;
+}
+
+void
+renderrunes(Bytes *b, Rune *r)
+{
+	int i, n;
+
+	n = runestrlen(r);
+	for(i=0; i<n; i++){
+		switch(r[i]){
+		case '\n':
+			if(inword)
+				emitword(b, r+wordi, i-wordi);
+			col = 0;
+			if(b->n == 0)
+				break;	/* don't start with blank lines */
+			if(b->n<2 || b->b[b->n-1]!='\n' || b->b[b->n-2]!='\n')
+				growbytes(b, "\n", 1);
+			break;
+		case ' ':
+			if(inword)
+				emitword(b, r+wordi, i-wordi);
+			break;
+		default:
+			if(!inword)
+				wordi = i;
+			inword = 1;
+			break;
+		}
+	}
+	if(inword)
+		emitword(b, r+wordi, i-wordi);
+}
+
+void
+renderbytes(Bytes *b, char *fmt, ...)
+{
+	Rune *r;
+	va_list arg;
+
+	va_start(arg, fmt);
+	r = runevsmprint(fmt, arg);
+	va_end(arg);
+	renderrunes(b, r);
+	free(r);
+}
+
+char*
+baseurl(char *url)
+{
+	char *base, *slash;
+	Resub rs[10];
+
+	if(url == nil)
+		return nil;
+	if(urlprog == nil){
+		urlprog = regcomp(urlexpr);
+		if(urlprog == nil)
+			error("can't compile URL regexp");
+	}
+	memset(rs, 0, sizeof rs);
+	if(regexec(urlprog, url, rs, nelem(rs)) == 0)
+		return nil;
+	base = estrdup(url);
+	slash = strrchr(base, '/');
+	if(slash!=nil && slash>=&base[rs[0].e.p-rs[0].s.p])
+		*slash = '\0';
+	else
+		base[rs[0].e.p-rs[0].s.p] = '\0';
+	return base;
+}
+
+char*
+fullurl(URLwin *u, Rune *rhref)
+{
+	char *base, *href, *hrefbase;
+	char *result;
+
+	if(rhref == nil)
+		return estrdup("NULL URL");
+	href = runetobyte(rhref, runestrlen(rhref));
+	hrefbase = baseurl(href);
+	result = nil;
+	if(hrefbase==nil && (base = baseurl(u->url))!=nil){
+		result = estrdup(base);
+		if(base[strlen(base)-1]!='/' && (href==nil || href[0]!='/'))
+			result = eappend(result, "/", "");
+		free(base);
+	}
+	if(href){
+		if(result)
+			result = eappend(result, "", href);
+		else
+			result = estrdup(href);
+	}
+	free(hrefbase);
+	if(result == nil)
+		return estrdup("***unknown***");
+	return result;
+}
+
+void
+render(URLwin *u, Bytes *t, Item *items, int curanchor)
+{
+	Item *il;
+	Itext *it;
+	Ifloat *ifl;
+	Ispacer *is;
+	Itable *ita;
+	Iimage *im;
+	Anchor *a;
+	Table *tab;
+	Tablecell *cell;
+	char *href;
+
+	inword = 0;
+	col = 0;
+	wordi = 0;
+
+	for(il=items; il!=nil; il=il->next){
+		if(il->state & IFbrk)
+			renderbytes(t, "\n");
+		if(il->state & IFbrksp)
+			renderbytes(t, "\n");
+
+		switch(il->tag){
+		case Itexttag:
+			it = (Itext*)il;
+			renderrunes(t, it->s);
+			break;
+		case Iruletag:
+			if(t->n>0 && t->b[t->n-1]!='\n')
+				renderbytes(t, "\n");
+			renderbytes(t, "=======\n");
+			break;
+		case Iimagetag:
+			if(!aflag)
+				break;
+			im = (Iimage*)il;
+			if(im->imsrc){
+				href = fullurl(u, im->imsrc);
+				renderbytes(t, "[image %s]", href);
+				free(href);
+			}
+			break;
+		case Iformfieldtag:
+			if(aflag)
+				renderbytes(t, "[formfield]");
+			break;
+		case Itabletag:
+			ita = (Itable*)il;
+			tab = ita->table;
+			for(cell=tab->cells; cell!=nil; cell=cell->next){
+				render(u, t, cell->content, curanchor);
+			}
+			if(t->n>0 && t->b[t->n-1]!='\n')
+				renderbytes(t, "\n");
+			break;
+		case Ifloattag:
+			ifl = (Ifloat*)il;
+			render(u, t, ifl->item, curanchor);
+			break;
+		case Ispacertag:
+			is = (Ispacer*)il;
+			if(is->spkind != ISPnull)
+				renderbytes(t, " ");
+			break;
+		default:
+			error("unknown item tag %d\n", il->tag);
+		}
+		if(il->anchorid != 0 && il->anchorid!=curanchor){
+			for(a=u->docinfo->anchors; a!=nil; a=a->next)
+				if(aflag && a->index == il->anchorid){
+					href = fullurl(u, a->href);
+					renderbytes(t, "[%s]", href);
+					free(href);
+					break;
+				}
+			curanchor = il->anchorid;
+		}
+	}
+	if(t->n>0 && t->b[t->n-1]!='\n')
+		renderbytes(t, "\n");
+}
+
+void
+rerender(URLwin *u)
+{
+	Bytes *t;
+
+	t = emalloc(sizeof(Bytes));
+
+	render(u, t, u->items, 0);
+
+	if(t->n)
+		write(u->outfd, (char*)t->b, t->n);
+	free(t->b);
+	free(t);
+}
+
+/*
+ * Somewhat of a hack.  Not a full parse, just looks for strings in the beginning
+ * of the document (cistrstr only looks at first somewhat bytes).
+ */
+int
+charset(char *s)
+{
+	char *meta, *emeta, *charset;
+
+	if(defcharset == 0)
+		defcharset = ISO_8859_1;
+	meta = cistrstr(s, "<meta");
+	if(meta == nil)
+		return defcharset;
+	for(emeta=meta; *emeta!='>' && *emeta!='\0'; emeta++)
+		;
+	charset = cistrstr(s, "charset=");
+	if(charset == nil)
+		return defcharset;
+	charset += 8;
+	if(*charset == '"')
+		charset++;
+	if(cistrncmp(charset, "utf-8", 5) || cistrncmp(charset, "utf8", 4))
+		return UTF_8;
+	return defcharset;
+}
+
+void
+rendertext(URLwin *u, Bytes *b)
+{
+	Rune *rurl;
+
+	rurl = toStr((uchar*)u->url, strlen(u->url), ISO_8859_1);
+	u->items = parsehtml(b->b, b->n, rurl, u->type, charset((char*)b->b), &u->docinfo);
+//	free(rurl);
+
+	rerender(u);
+}
+
+
+void
+freeurlwin(URLwin *u)
+{
+	freeitems(u->items);
+	u->items = nil;
+	freedocinfo(u->docinfo);
+	u->docinfo = nil;
+	free(u);
+}

diff --git a/src/cmd/htmlfmt/main.c b/src/cmd/htmlfmt/main.c
new file mode 100644
index 0000000..f85bbb4
--- /dev/null
+++ b/src/cmd/htmlfmt/main.c

@@ -0,0 +1,71 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <draw.h>
+#include <html.h>
+#include "dat.h"
+
+char *url = "";
+int aflag;
+int width = 70;
+int defcharset;
+
+void
+usage(void)
+{
+	fprint(2, "usage: htmlfmt [-c charset] [-u URL] [-a] [-l length] [file ...]\n");
+	exits("usage");
+}
+
+void
+main(int argc, char *argv[])
+{
+	int i, fd;
+	char *p, *err, *file;
+	char errbuf[ERRMAX];
+
+	ARGBEGIN{
+	case 'a':
+		aflag++;
+		break;
+	case 'c':
+		p = smprint("<meta charset=\"%s\">", EARGF(usage()));
+		defcharset = charset(p);
+		free(p);
+		break;
+	case 'l': case 'w':
+		err = EARGF(usage());
+		width = atoi(err);
+		if(width <= 0)
+			usage();
+		break;
+	case 'u':
+		url = EARGF(usage());
+		aflag++;
+		break;
+	default:
+		usage();
+	}ARGEND
+
+	err = nil;
+	file = "<stdin>";
+	if(argc == 0)
+		err = loadhtml(0);
+	else
+		for(i=0; err==nil && i<argc; i++){
+			file = argv[i];
+			fd = open(file, OREAD);
+			if(fd < 0){
+				errstr(errbuf, sizeof errbuf);
+				err = errbuf;
+				break;
+			}
+			err = loadhtml(fd);
+			close(fd);
+			if(err)
+				break;
+		}
+	if(err)
+		fprint(2, "htmlfmt: processing %s: %s\n", file, err);
+	exits(err);
+}

diff --git a/src/cmd/htmlfmt/mkfile b/src/cmd/htmlfmt/mkfile
new file mode 100644
index 0000000..5b26353
--- /dev/null
+++ b/src/cmd/htmlfmt/mkfile

@@ -0,0 +1,30 @@
+<$SYS9/$systype/$objtype/mkfile
+
+TARG=htmlfmt
+OFILES=\
+	main.$O\
+	html.$O\
+	util.$O\
+
+HFILES=\
+	dat.h\
+	$SYS9/sys/include/html.h\
+
+LIB=$SYS9/$systype/$objtype/lib/libbio.a\
+	$SYS9/$systype/$objtype/lib/libregexp.a\
+	$SYS9/$systype/$objtype/lib/libhtml.a\
+	$SYS9/$systype/$objtype/lib/lib9c.a
+
+BIN=$SYS9/$systype/$objtype/bin
+
+UPDATE=\
+	mkfile\
+	$HFILES\
+	${OFILES:%.$O=%.c}
+
+<$SYS9/sys/src/cmd/mkone
+
+CFLAGS=$CFLAGS
+
+#$O.out: $OFILES
+#	$LD -o $target  $LDFLAGS $OFILES

diff --git a/src/cmd/htmlfmt/util.c b/src/cmd/htmlfmt/util.c
new file mode 100644
index 0000000..b22b0ab
--- /dev/null
+++ b/src/cmd/htmlfmt/util.c

@@ -0,0 +1,120 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <draw.h>
+#include <html.h>
+#include "dat.h"
+
+void*
+emalloc(ulong n)
+{
+	void *p;
+
+	p = malloc(n);
+	if(p == nil)
+		error("can't malloc: %r");
+	memset(p, 0, n);
+	return p;
+}
+
+void*
+erealloc(void *p, ulong n)
+{
+	p = realloc(p, n);
+	if(p == nil)
+		error("can't malloc: %r");
+	return p;
+}
+
+char*
+estrdup(char *s)
+{
+	char *t;
+
+	t = emalloc(strlen(s)+1);
+	strcpy(t, s);
+	return t;
+}
+
+char*
+estrstrdup(char *s, char *t)
+{
+	long ns, nt;
+	char *u;
+
+	ns = strlen(s);
+	nt = strlen(t);
+	/* use malloc to avoid memset */
+	u = malloc(ns+nt+1);
+	if(u == nil)
+		error("can't malloc: %r");
+	memmove(u, s, ns);
+	memmove(u+ns, t, nt);
+	u[ns+nt] = '\0';
+	return u;
+}
+
+char*
+eappend(char *s, char *sep, char *t)
+{
+	long ns, nsep, nt;
+	char *u;
+
+	if(t == nil)
+		u = estrstrdup(s, sep);
+	else{
+		ns = strlen(s);
+		nsep = strlen(sep);
+		nt = strlen(t);
+		/* use malloc to avoid memset */
+		u = malloc(ns+nsep+nt+1);
+		if(u == nil)
+			error("can't malloc: %r");
+		memmove(u, s, ns);
+		memmove(u+ns, sep, nsep);
+		memmove(u+ns+nsep, t, nt);
+		u[ns+nsep+nt] = '\0';
+	}
+	free(s);
+	return u;
+}
+
+char*
+egrow(char *s, char *sep, char *t)
+{
+	s = eappend(s, sep, t);
+	free(t);
+	return s;
+}
+
+void
+error(char *fmt, ...)
+{
+	va_list arg;
+	char buf[256];
+	Fmt f;
+
+	fmtfdinit(&f, 2, buf, sizeof buf);
+	fmtprint(&f, "Mail: ");
+	va_start(arg, fmt);
+	fmtvprint(&f, fmt, arg);
+	va_end(arg);
+	fmtprint(&f, "\n");
+	fmtfdflush(&f);
+	exits(fmt);
+}
+
+void
+growbytes(Bytes *b, char *s, long ns)
+{
+	if(b->nalloc < b->n + ns + 1){
+		b->nalloc = b->n + ns + 8000;
+		/* use realloc to avoid memset */
+		b->b = realloc(b->b, b->nalloc);
+		if(b->b == nil)
+			error("growbytes: can't realloc: %r");
+	}
+	memmove(b->b+b->n, s, ns);
+	b->n += ns;
+	b->b[b->n] = '\0';
+}

diff --git a/src/libhtml/build.c b/src/libhtml/build.c
new file mode 100644
index 0000000..32e6401
--- /dev/null
+++ b/src/libhtml/build.c

@@ -0,0 +1,4238 @@
+#include <u.h>
+#include <libc.h>
+#include <draw.h>
+#include <ctype.h>
+#include <html.h>
+#include "impl.h"
+
+// A stack for holding integer values
+enum {
+	Nestmax = 40	// max nesting level of lists, font styles, etc.
+};
+
+struct Stack {
+	int		n;				// next available slot (top of stack is stack[n-1])
+	int		slots[Nestmax];	// stack entries
+};
+
+// Parsing state
+struct Pstate
+{
+	Pstate*	next;			// in stack of Pstates
+	int		skipping;		// true when we shouldn't add items
+	int		skipwhite;		// true when we should strip leading space
+	int		curfont;		// font index for current font
+	int		curfg;		// current foreground color
+	Background	curbg;	// current background
+	int		curvoff;		// current baseline offset
+	uchar	curul;		// current underline/strike state
+	uchar	curjust;		// current justify state
+	int		curanchor;	// current (href) anchor id (if in one), or 0
+	int		curstate;		// current value of item state
+	int		literal;		// current literal state
+	int		inpar;		// true when in a paragraph-like construct
+	int		adjsize;		// current font size adjustment
+	Item*	items;		// dummy head of item list we're building
+	Item*	lastit;		// tail of item list we're building
+	Item*	prelastit;		// item before lastit
+	Stack	fntstylestk;	// style stack
+	Stack	fntsizestk;		// size stack
+	Stack	fgstk;		// text color stack
+	Stack	ulstk;		// underline stack
+	Stack	voffstk;		// vertical offset stack
+	Stack	listtypestk;	// list type stack
+	Stack	listcntstk;		// list counter stack
+	Stack	juststk;		// justification stack
+	Stack	hangstk;		// hanging stack
+};
+
+struct ItemSource
+{
+	Docinfo*		doc;
+	Pstate*		psstk;
+	int			nforms;
+	int			ntables;
+	int			nanchors;
+	int			nframes;
+	Form*		curform;
+	Map*		curmap;
+	Table*		tabstk;
+	Kidinfo*		kidstk;
+};
+
+// Some layout parameters
+enum {
+	FRKIDMARGIN = 6,	// default margin around kid frames
+	IMGHSPACE = 0,	// default hspace for images (0 matches IE, Netscape)
+	IMGVSPACE = 0,	// default vspace for images
+	FLTIMGHSPACE = 2,	// default hspace for float images
+	TABSP = 5,		// default cellspacing for tables
+	TABPAD = 1,		// default cell padding for tables
+	LISTTAB = 1,		// number of tabs to indent lists
+	BQTAB = 1,		// number of tabs to indent blockquotes
+	HRSZ = 2,			// thickness of horizontal rules
+	SUBOFF = 4,		// vertical offset for subscripts
+	SUPOFF = 6,		// vertical offset for superscripts
+	NBSP = 160		// non-breaking space character
+};
+
+// These tables must be sorted
+static StringInt *align_tab;
+static AsciiInt _align_tab[] = {
+	{"baseline",	ALbaseline},
+	{"bottom",	ALbottom},
+	{"center",	ALcenter},
+	{"char",		ALchar},
+	{"justify",	ALjustify},
+	{"left",		ALleft},
+	{"middle",	ALmiddle},
+	{"right",		ALright},
+	{"top",		ALtop}
+};
+#define NALIGNTAB (sizeof(align_tab)/sizeof(StringInt))
+
+static StringInt *input_tab;
+static AsciiInt _input_tab[] = {
+	{"button",	Fbutton},
+	{"checkbox",	Fcheckbox},
+	{"file",		Ffile},
+	{"hidden",	Fhidden},
+	{"image",	Fimage},
+	{"password",	Fpassword},
+	{"radio",		Fradio},
+	{"reset",		Freset},
+	{"submit",	Fsubmit},
+	{"text",		Ftext}
+};
+#define NINPUTTAB (sizeof(input_tab)/sizeof(StringInt))
+
+static StringInt *clear_tab;
+static AsciiInt _clear_tab[] = {
+	{"all",	IFcleft|IFcright},
+	{"left",	IFcleft},
+	{"right",	IFcright}
+};
+#define NCLEARTAB (sizeof(clear_tab)/sizeof(StringInt))
+
+static StringInt *fscroll_tab;
+static AsciiInt _fscroll_tab[] = {
+	{"auto",	FRhscrollauto|FRvscrollauto},
+	{"no",	FRnoscroll},
+	{"yes",	FRhscroll|FRvscroll},
+};
+#define NFSCROLLTAB (sizeof(fscroll_tab)/sizeof(StringInt))
+
+static StringInt *shape_tab;
+static AsciiInt _shape_tab[] = {
+	{"circ",		SHcircle},
+	{"circle",		SHcircle},
+	{"poly",		SHpoly},
+	{"polygon",	SHpoly},
+	{"rect",		SHrect},
+	{"rectangle",	SHrect}
+};
+#define NSHAPETAB (sizeof(shape_tab)/sizeof(StringInt))
+
+static StringInt *method_tab;
+static AsciiInt _method_tab[] = {
+	{"get",		HGet},
+	{"post",		HPost}
+};
+#define NMETHODTAB (sizeof(method_tab)/sizeof(StringInt))
+
+static Rune** roman;
+static char* _roman[15]= {
+	"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X",
+	"XI", "XII", "XIII", "XIV", "XV"
+};
+#define NROMAN 15
+
+// List number types
+enum {
+	LTdisc, LTsquare, LTcircle, LT1, LTa, LTA, LTi, LTI
+};
+
+enum {
+	SPBefore = 2,
+	SPAfter = 4,
+	BL = 1,
+	BLBA = (BL|SPBefore|SPAfter)
+};
+
+// blockbrk[tag] is break info for a block level element, or one
+// of a few others that get the same treatment re ending open paragraphs
+// and requiring a line break / vertical space before them.
+// If we want a line of space before the given element, SPBefore is OR'd in.
+// If we want a line of space after the given element, SPAfter is OR'd in.
+
+static uchar blockbrk[Numtags]= {
+	[Taddress] BLBA, [Tblockquote] BLBA, [Tcenter] BL,
+	[Tdir] BLBA, [Tdiv] BL, [Tdd] BL, [Tdl] BLBA,
+	[Tdt] BL, [Tform] BLBA,
+	// headings and tables get breaks added manually
+	[Th1] BL, [Th2] BL, [Th3] BL,
+	[Th4] BL, [Th5] BL, [Th6] BL,
+	[Thr] BL, [Tisindex] BLBA, [Tli] BL, [Tmenu] BLBA,
+	[Tol] BLBA, [Tp] BLBA, [Tpre] BLBA,
+	[Tul] BLBA
+};
+
+enum {
+	AGEN = 1
+};
+
+// attrinfo is information about attributes.
+// The AGEN value means that the attribute is generic (applies to almost all elements)
+static uchar attrinfo[Numattrs]= {
+	[Aid] AGEN, [Aclass] AGEN, [Astyle] AGEN, [Atitle] AGEN,
+	[Aonblur] AGEN, [Aonchange] AGEN, [Aonclick] AGEN,
+	[Aondblclick] AGEN, [Aonfocus] AGEN, [Aonkeypress] AGEN,
+	[Aonkeyup] AGEN, [Aonload] AGEN, [Aonmousedown] AGEN,
+	[Aonmousemove] AGEN, [Aonmouseout] AGEN, [Aonmouseover] AGEN,
+	[Aonmouseup] AGEN, [Aonreset] AGEN, [Aonselect] AGEN,
+	[Aonsubmit] AGEN, [Aonunload] AGEN
+};
+
+static uchar scriptev[Numattrs]= {
+	[Aonblur] SEonblur, [Aonchange] SEonchange, [Aonclick] SEonclick,
+	[Aondblclick] SEondblclick, [Aonfocus] SEonfocus, [Aonkeypress] SEonkeypress,
+	[Aonkeyup] SEonkeyup, [Aonload] SEonload, [Aonmousedown] SEonmousedown,
+	[Aonmousemove] SEonmousemove, [Aonmouseout] SEonmouseout, [Aonmouseover] SEonmouseover,
+	[Aonmouseup] SEonmouseup, [Aonreset] SEonreset, [Aonselect] SEonselect,
+	[Aonsubmit] SEonsubmit, [Aonunload] SEonunload
+};
+
+// Color lookup table
+static StringInt *color_tab;
+static AsciiInt _color_tab[] = {
+	{"aqua", 0x00FFFF},
+	{"black",  0x000000},
+	{"blue", 0x0000CC},
+	{"fuchsia", 0xFF00FF},
+	{"gray", 0x808080},
+	{"green", 0x008000},
+	{"lime", 0x00FF00},
+	{"maroon", 0x800000},
+	{"navy", 0x000080,},
+	{"olive", 0x808000},
+	{"purple", 0x800080},
+	{"red", 0xFF0000},
+	{"silver", 0xC0C0C0},
+	{"teal", 0x008080},
+	{"white", 0xFFFFFF},
+	{"yellow", 0xFFFF00}
+};
+#define NCOLORS (sizeof(color_tab)/sizeof(StringInt))
+
+static StringInt 		*targetmap;
+static int			targetmapsize;
+static int			ntargets;
+
+static int buildinited = 0;
+
+#define SMALLBUFSIZE 240
+#define BIGBUFSIZE 2000
+
+int	dbgbuild = 0;
+int	warn = 0;
+
+static Align		aalign(Token* tok);
+static int			acolorval(Token* tok, int attid, int dflt);
+static void			addbrk(Pstate* ps, int sp, int clr);
+static void			additem(Pstate* ps, Item* it, Token* tok);
+static void			addlinebrk(Pstate* ps, int clr);
+static void			addnbsp(Pstate* ps);
+static void			addtext(Pstate* ps, Rune* s);
+static Dimen		adimen(Token* tok, int attid);
+static int			aflagval(Token* tok, int attid);
+static int			aintval(Token* tok, int attid, int dflt);
+static Rune*		astrval(Token* tok, int attid, Rune* dflt);
+static int			atabval(Token* tok, int attid, StringInt* tab, int ntab, int dflt);
+static int			atargval(Token* tok, int dflt);
+static int			auintval(Token* tok, int attid, int dflt);
+static Rune*		aurlval(Token* tok, int attid, Rune* dflt, Rune* base);
+static Rune*		aval(Token* tok, int attid);
+static void			buildinit(void);
+static Pstate*		cell_pstate(Pstate* oldps, int ishead);
+static void			changehang(Pstate* ps, int delta);
+static void			changeindent(Pstate* ps, int delta);
+static int			color(Rune* s, int dflt);
+static void			copystack(Stack* tostk, Stack* fromstk);
+static int			dimprint(char* buf, int nbuf, Dimen d);
+static Pstate*		finishcell(Table* curtab, Pstate* psstk);
+static void			finish_table(Table* t);
+static void			freeanchor(Anchor* a);
+static void			freedestanchor(DestAnchor* da);
+static void			freeform(Form* f);
+static void			freeformfield(Formfield* ff);
+static void			freeitem(Item* it);
+static void			freepstate(Pstate* p);
+static void			freepstatestack(Pstate* pshead);
+static void			freescriptevents(SEvent* ehead);
+static void			freetable(Table* t);
+static Map*		getmap(Docinfo* di, Rune* name);
+static Rune*		getpcdata(Token* toks, int tokslen, int* ptoki);
+static Pstate*		lastps(Pstate* psl);
+static Rune*		listmark(uchar ty, int n);
+static int			listtyval(Token* tok, int dflt);
+static Align		makealign(int halign, int valign);
+static Background	makebackground(Rune* imgurl, int color);
+static Dimen		makedimen(int kind, int spec);
+static Anchor*		newanchor(int index, Rune* name, Rune* href, int target, Anchor* link);
+static Area*		newarea(int shape, Rune* href, int target, Area* link);
+static DestAnchor*	newdestanchor(int index, Rune* name, Item* item, DestAnchor* link);
+static Docinfo*		newdocinfo(void);
+static Genattr*		newgenattr(Rune* id, Rune* class, Rune* style, Rune* title, SEvent* events);
+static Form*		newform(int formid, Rune* name, Rune* action,
+					int target, int method, Form* link);
+static Formfield*	newformfield(int ftype, int fieldid, Form* form, Rune* name,
+					Rune* value, int size, int maxlength, Formfield* link);
+static Item*		newifloat(Item* it, int side);
+static Item*		newiformfield(Formfield* ff);
+static Item*		newiimage(Rune* src, Rune* altrep, int align, int width, int height,
+					int hspace, int vspace, int border, int ismap, Map* map);
+static Item*		newirule(int align, int size, int noshade, Dimen wspec);
+static Item*		newispacer(int spkind);
+static Item*		newitable(Table* t);
+static ItemSource*	newitemsource(Docinfo* di);
+static Item*		newitext(Rune* s, int fnt, int fg, int voff, int ul);
+static Kidinfo*		newkidinfo(int isframeset, Kidinfo* link);
+static Option*		newoption(int selected, Rune* value, Rune* display, Option* link);
+static Pstate*		newpstate(Pstate* link);
+static SEvent*		newscriptevent(int type, Rune* script, SEvent* link);
+static Table*		newtable(int tableid, Align align, Dimen width, int border,
+					int cellspacing, int cellpadding, Background bg, Token* tok, Table* link);
+static Tablecell*	newtablecell(int cellid, int rowspan, int colspan, Align align, Dimen wspec,
+					int hspec, Background bg, int flags, Tablecell* link);
+static Tablerow*	newtablerow(Align align, Background bg, int flags, Tablerow* link);
+static Dimen		parsedim(Rune* s, int ns);
+static void			pop(Stack* stk);
+static void			popfontsize(Pstate* ps);
+static void			popfontstyle(Pstate* ps);
+static void			popjust(Pstate* ps);
+static int			popretnewtop(Stack* stk, int dflt);
+static int			push(Stack* stk, int val);
+static void			pushfontsize(Pstate* ps, int sz);
+static void			pushfontstyle(Pstate* ps, int sty);
+static void			pushjust(Pstate* ps, int j);
+static Item*		textit(Pstate* ps, Rune* s);
+static Rune*		removeallwhite(Rune* s);
+static void			resetdocinfo(Docinfo* d);
+static void			setcurfont(Pstate* ps);
+static void			setcurjust(Pstate* ps);
+static void			setdimarray(Token* tok, int attid, Dimen** pans, int* panslen);
+static Rune*		stringalign(int a);
+static void			targetmapinit(void);
+static int			toint(Rune* s);
+static int			top(Stack* stk, int dflt);
+static void			trim_cell(Tablecell* c);
+static int			validalign(Align a);
+static int			validdimen(Dimen d);
+static int			validformfield(Formfield* f);
+static int			validhalign(int a);
+static int			validptr(void* p);
+static int			validStr(Rune* s);
+static int			validtable(Table* t);
+static int			validtablerow(Tablerow* r);
+static int			validtablecol(Tablecol* c);
+static int			validtablecell(Tablecell* c);
+static int			validvalign(int a);
+static int			Iconv(Fmt *f);
+
+static void
+buildinit(void)
+{
+	runetabinit();
+	roman = cvtstringtab(_roman, nelem(_roman));
+	color_tab = cvtstringinttab(_color_tab, nelem(_color_tab));
+	method_tab = cvtstringinttab(_method_tab, nelem(_method_tab));
+	shape_tab = cvtstringinttab(_shape_tab, nelem(_shape_tab));
+	fscroll_tab = cvtstringinttab(_fscroll_tab, nelem(_fscroll_tab));
+	clear_tab = cvtstringinttab(_clear_tab, nelem(_clear_tab));
+	input_tab = cvtstringinttab(_input_tab, nelem(_input_tab));
+	align_tab = cvtstringinttab(_align_tab, nelem(_align_tab));
+
+	fmtinstall('I', Iconv);
+	targetmapinit();
+	buildinited = 1;
+}
+
+static ItemSource*
+newitemsource(Docinfo* di)
+{
+	ItemSource*	is;
+	Pstate*	ps;
+
+	ps = newpstate(nil);
+	if(di->mediatype != TextHtml) {
+		ps->curstate &= ~IFwrap;
+		ps->literal = 1;
+		pushfontstyle(ps, FntT);
+	}
+	is = (ItemSource*)emalloc(sizeof(ItemSource));
+	is->doc = di;
+	is->psstk = ps;
+	is->nforms = 0;
+	is->ntables = 0;
+	is->nanchors = 0;
+	is->nframes = 0;
+	is->curform = nil;
+	is->curmap = nil;
+	is->tabstk = nil;
+	is->kidstk = nil;
+	return is;
+}
+
+static Item *getitems(ItemSource* is, uchar* data, int datalen);
+
+// Parse an html document and create a list of layout items.
+// Allocate and return document info in *pdi.
+// When caller is done with the items, it should call
+// freeitems on the returned result, and then
+// freedocinfo(*pdi).
+Item*
+parsehtml(uchar* data, int datalen, Rune* pagesrc, int mtype, int chset, Docinfo** pdi)
+{
+	Item *it;
+	Docinfo*	di;
+	ItemSource*	is;
+
+	di = newdocinfo();
+	di->src = _Strdup(pagesrc);
+	di->base = _Strdup(pagesrc);
+	di->mediatype = mtype;
+	di->chset = chset;
+	*pdi = di;
+	is = newitemsource(di);
+	it = getitems(is, data, datalen);
+	freepstatestack(is->psstk);
+	free(is);
+	return it;
+}
+
+// Get a group of tokens for lexer, parse them, and create
+// a list of layout items.
+// When caller is done with the items, it should call
+// freeitems on the returned result.
+static Item*
+getitems(ItemSource* is, uchar* data, int datalen)
+{
+	int	i;
+	int	j;
+	int	nt;
+	int	pt;
+	int	doscripts;
+	int	tokslen;
+	int	toki;
+	int	h;
+	int	sz;
+	int	method;
+	int	n;
+	int	nblank;
+	int	norsz;
+	int	bramt;
+	int	sty;
+	int	nosh;
+	int	oldcuranchor;
+	int	dfltbd;
+	int	v;
+	int	hang;
+	int	isempty;
+	int	tag;
+	int	brksp;
+	int	target;
+	uchar	brk;
+	uchar	flags;
+	uchar	align;
+	uchar	al;
+	uchar	ty;
+	uchar	ty2;
+	Pstate*	ps;
+	Pstate*	nextps;
+	Pstate*	outerps;
+	Table*	curtab;
+	Token*	tok;
+	Token*	toks;
+	Docinfo*	di;
+	Item*	ans;
+	Item*	img;
+	Item*	ffit;
+	Item*	tabitem;
+	Rune*	s;
+	Rune*	t;
+	Rune*	name;
+	Rune*	enctype;
+	Rune*	usemap;
+	Rune*	prompt;
+	Rune*	equiv;
+	Rune*	val;
+	Rune*	nsz;
+	Rune*	script;
+	Map*	map;
+	Form*	frm;
+	Iimage*	ii;
+	Kidinfo*	kd;
+	Kidinfo*	ks;
+	Kidinfo*	pks;
+	Dimen	wd;
+	Option*	option;
+	Table*	tab;
+	Tablecell*	c;
+	Tablerow*	tr;
+	Formfield*	field;
+	Formfield*	ff;
+	Rune*	href;
+	Rune*	src;
+	Rune*	scriptsrc;
+	Rune*	bgurl;
+	Rune*	action;
+	Background	bg;
+
+	if(!buildinited)
+		buildinit();
+	doscripts = 0;	// for now
+	ps = is->psstk;
+	curtab = is->tabstk;
+	di = is->doc;
+	toks = _gettoks(data, datalen, di->chset, di->mediatype, &tokslen);
+	toki = 0;
+	for(; toki < tokslen; toki++) {
+		tok = &toks[toki];
+		if(dbgbuild > 1)
+			fprint(2, "build: curstate %ux, token %T\n", ps->curstate, tok);
+		tag = tok->tag;
+		brk = 0;
+		brksp = 0;
+		if(tag < Numtags) {
+			brk = blockbrk[tag];
+			if(brk&SPBefore)
+				brksp = 1;
+		}
+		else if(tag < Numtags + RBRA) {
+			brk = blockbrk[tag - RBRA];
+			if(brk&SPAfter)
+				brksp = 1;
+		}
+		if(brk) {
+			addbrk(ps, brksp, 0);
+			if(ps->inpar) {
+				popjust(ps);
+				ps->inpar = 0;
+			}
+		}
+		// check common case first (Data), then switch statement on tag
+		if(tag == Data) {
+			// Lexing didn't pay attention to SGML record boundary rules:
+			// \n after start tag or before end tag to be discarded.
+			// (Lex has already discarded all \r's).
+			// Some pages assume this doesn't happen in <PRE> text,
+			// so we won't do it if literal is true.
+			// BUG: won't discard \n before a start tag that begins
+			// the next bufferful of tokens.
+			s = tok->text;
+			n = _Strlen(s);
+			if(!ps->literal) {
+				i = 0;
+				j = n;
+				if(toki > 0) {
+					pt = toks[toki - 1].tag;
+					// IE and Netscape both ignore this rule (contrary to spec)
+					// if previous tag was img
+					if(pt < Numtags && pt != Timg && j > 0 && s[0] == '\n')
+						i++;
+				}
+				if(toki < tokslen - 1) {
+					nt = toks[toki + 1].tag;
+					if(nt >= RBRA && nt < Numtags + RBRA && j > i && s[j - 1] == '\n')
+						j--;
+				}
+				if(i > 0 || j < n) {
+					t = s;
+					s = _Strsubstr(s, i, j);
+					free(t);
+					n = j-i;
+				}
+			}
+			if(ps->skipwhite) {
+				_trimwhite(s, n, &t, &nt);
+				if(t == nil) {
+					free(s);
+					s = nil;
+				}
+				else if(t != s) {
+					t = _Strndup(t, nt);
+					free(s);
+					s = t;
+				}
+				if(s != nil)
+					ps->skipwhite = 0;
+			}
+			tok->text = nil;		// token doesn't own string anymore
+			if(s != nil)
+				addtext(ps, s);
+		}
+		else
+			switch(tag) {
+			// Some abbrevs used in following DTD comments
+			// %text = 	#PCDATA
+			//		| TT | I | B | U | STRIKE | BIG | SMALL | SUB | SUP
+			//		| EM | STRONG | DFN | CODE | SAMP | KBD | VAR | CITE
+			//		| A | IMG | APPLET | FONT | BASEFONT | BR | SCRIPT | MAP
+			//		| INPUT | SELECT | TEXTAREA
+			// %block = P | UL | OL | DIR | MENU | DL | PRE | DL | DIV | CENTER
+			//		| BLOCKQUOTE | FORM | ISINDEX | HR | TABLE
+			// %flow = (%text | %block)*
+			// %body.content = (%heading | %text | %block | ADDRESS)*
+
+			// <!ELEMENT A - - (%text) -(A)>
+			// Anchors are not supposed to be nested, but you sometimes see
+			// href anchors inside destination anchors.
+			case Ta:
+				if(ps->curanchor != 0) {
+					if(warn)
+						fprint(2, "warning: nested <A> or missing </A>\n");
+					ps->curanchor = 0;
+				}
+				name = aval(tok, Aname);
+				href = aurlval(tok, Ahref, nil, di->base);
+				// ignore rel, rev, and title attrs
+				if(href != nil) {
+					target = atargval(tok, di->target);
+					di->anchors = newanchor(++is->nanchors, name, href, target, di->anchors);
+					if(name != nil)
+						name = _Strdup(name);	// for DestAnchor construction, below
+					ps->curanchor = is->nanchors;
+					ps->curfg = push(&ps->fgstk, di->link);
+					ps->curul = push(&ps->ulstk, ULunder);
+				}
+				if(name != nil) {
+					// add a null item to be destination
+					additem(ps, newispacer(ISPnull), tok);
+					di->dests = newdestanchor(++is->nanchors, name, ps->lastit, di->dests);
+				}
+				break;
+
+			case Ta+RBRA :
+				if(ps->curanchor != 0) {
+					ps->curfg = popretnewtop(&ps->fgstk, di->text);
+					ps->curul = popretnewtop(&ps->ulstk, ULnone);
+					ps->curanchor = 0;
+				}
+				break;
+
+			// <!ELEMENT APPLET - - (PARAM | %text)* >
+			// We can't do applets, so ignore PARAMS, and let
+			// the %text contents appear for the alternative rep
+			case Tapplet:
+			case Tapplet+RBRA:
+				if(warn && tag == Tapplet)
+					fprint(2, "warning: <APPLET> ignored\n");
+				break;
+
+			// <!ELEMENT AREA - O EMPTY>
+			case Tarea:
+				map = di->maps;
+				if(map == nil) {
+					if(warn)
+						fprint(2, "warning: <AREA> not inside <MAP>\n");
+					continue;
+				}
+				map->areas = newarea(atabval(tok, Ashape, shape_tab, NSHAPETAB, SHrect),
+					aurlval(tok, Ahref, nil, di->base),
+					atargval(tok, di->target),
+					map->areas);
+				setdimarray(tok, Acoords, &map->areas->coords, &map->areas->ncoords);
+				break;
+
+			// <!ELEMENT (B|STRONG) - - (%text)*>
+			case Tb:
+			case Tstrong:
+				pushfontstyle(ps, FntB);
+				break;
+
+			case Tb+RBRA:
+			case Tcite+RBRA:
+			case Tcode+RBRA:
+			case Tdfn+RBRA:
+			case Tem+RBRA:
+			case Tkbd+RBRA:
+			case Ti+RBRA:
+			case Tsamp+RBRA:
+			case Tstrong+RBRA:
+			case Ttt+RBRA:
+			case Tvar+RBRA :
+			case Taddress+RBRA:
+				popfontstyle(ps);
+				break;
+
+			// <!ELEMENT BASE - O EMPTY>
+			case Tbase:
+				t = di->base;
+				di->base = aurlval(tok, Ahref, di->base, di->base);
+				if(t != nil)
+					free(t);
+				di->target = atargval(tok, di->target);
+				break;
+
+			// <!ELEMENT BASEFONT - O EMPTY>
+			case Tbasefont:
+				ps->adjsize = aintval(tok, Asize, 3) - 3;
+				break;
+
+			// <!ELEMENT (BIG|SMALL) - - (%text)*>
+			case Tbig:
+			case Tsmall:
+				sz = ps->adjsize;
+				if(tag == Tbig)
+					sz += Large;
+				else
+					sz += Small;
+				pushfontsize(ps, sz);
+				break;
+
+			case Tbig+RBRA:
+			case Tsmall+RBRA:
+				popfontsize(ps);
+				break;
+
+			// <!ELEMENT BLOCKQUOTE - - %body.content>
+			case Tblockquote:
+				changeindent(ps, BQTAB);
+				break;
+
+			case Tblockquote+RBRA:
+				changeindent(ps, -BQTAB);
+				break;
+
+			// <!ELEMENT BODY O O %body.content>
+			case Tbody:
+				ps->skipping = 0;
+				bg = makebackground(nil, acolorval(tok, Abgcolor, di->background.color));
+				bgurl = aurlval(tok, Abackground, nil, di->base);
+				if(bgurl != nil) {
+					if(di->backgrounditem != nil)
+						freeitem((Item*)di->backgrounditem);
+						// really should remove old item from di->images list,
+						// but there should only be one BODY element ...
+					di->backgrounditem = (Iimage*)newiimage(bgurl, nil, ALnone, 0, 0, 0, 0, 0, 0, nil);
+					di->backgrounditem->nextimage = di->images;
+					di->images = di->backgrounditem;
+				}
+				ps->curbg = bg;
+				di->background = bg;
+				di->text = acolorval(tok, Atext, di->text);
+				di->link = acolorval(tok, Alink, di->link);
+				di->vlink = acolorval(tok, Avlink, di->vlink);
+				di->alink = acolorval(tok, Aalink, di->alink);
+				if(di->text != ps->curfg) {
+					ps->curfg = di->text;
+					ps->fgstk.n = 0;
+				}
+				break;
+
+			case Tbody+RBRA:
+				// HTML spec says ignore things after </body>,
+				// but IE and Netscape don't
+				// ps.skipping = 1;
+				break;
+
+			// <!ELEMENT BR - O EMPTY>
+			case Tbr:
+				addlinebrk(ps, atabval(tok, Aclear, clear_tab, NCLEARTAB, 0));
+				break;
+
+			// <!ELEMENT CAPTION - - (%text;)*>
+			case Tcaption:
+				if(curtab == nil) {
+					if(warn)
+						fprint(2, "warning: <CAPTION> outside <TABLE>\n");
+					continue;
+				}
+				if(curtab->caption != nil) {
+					if(warn)
+						fprint(2, "warning: more than one <CAPTION> in <TABLE>\n");
+					continue;
+				}
+				ps = newpstate(ps);
+				curtab->caption_place = atabval(tok, Aalign, align_tab, NALIGNTAB, ALtop);
+				break;
+
+			case Tcaption+RBRA:
+				nextps = ps->next;
+				if(curtab == nil || nextps == nil) {
+					if(warn)
+						fprint(2, "warning: unexpected </CAPTION>\n");
+					continue;
+				}
+				curtab->caption = ps->items->next;
+				free(ps);
+				ps = nextps;
+				break;
+
+			case Tcenter:
+			case Tdiv:
+				if(tag == Tcenter)
+					al = ALcenter;
+				else
+					al = atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust);
+				pushjust(ps, al);
+				break;
+
+			case Tcenter+RBRA:
+			case Tdiv+RBRA:
+				popjust(ps);
+				break;
+
+			// <!ELEMENT DD - O  %flow >
+			case Tdd:
+				if(ps->hangstk.n == 0) {
+					if(warn)
+						fprint(2, "warning: <DD> not inside <DL\n");
+					continue;
+				}
+				h = top(&ps->hangstk, 0);
+				if(h != 0)
+					changehang(ps, -10*LISTTAB);
+				else
+					addbrk(ps, 0, 0);
+				push(&ps->hangstk, 0);
+				break;
+
+			//<!ELEMENT (DIR|MENU) - - (LI)+ -(%block) >
+			//<!ELEMENT (OL|UL) - - (LI)+>
+			case Tdir:
+			case Tmenu:
+			case Tol:
+			case Tul:
+				changeindent(ps, LISTTAB);
+				push(&ps->listtypestk, listtyval(tok, (tag==Tol)? LT1 : LTdisc));
+				push(&ps->listcntstk, aintval(tok, Astart, 1));
+				break;
+
+			case Tdir+RBRA:
+			case Tmenu+RBRA:
+			case Tol+RBRA:
+			case Tul+RBRA:
+				if(ps->listtypestk.n == 0) {
+					if(warn)
+						fprint(2, "warning: %T ended no list\n", tok);
+					continue;
+				}
+				addbrk(ps, 0, 0);
+				pop(&ps->listtypestk);
+				pop(&ps->listcntstk);
+				changeindent(ps, -LISTTAB);
+				break;
+
+			// <!ELEMENT DL - - (DT|DD)+ >
+			case Tdl:
+				changeindent(ps, LISTTAB);
+				push(&ps->hangstk, 0);
+				break;
+
+			case Tdl+RBRA:
+				if(ps->hangstk.n == 0) {
+					if(warn)
+						fprint(2, "warning: unexpected </DL>\n");
+					continue;
+				}
+				changeindent(ps, -LISTTAB);
+				if(top(&ps->hangstk, 0) != 0)
+					changehang(ps, -10*LISTTAB);
+				pop(&ps->hangstk);
+				break;
+
+			// <!ELEMENT DT - O (%text)* >
+			case Tdt:
+				if(ps->hangstk.n == 0) {
+					if(warn)
+						fprint(2, "warning: <DT> not inside <DL>\n");
+					continue;
+				}
+				h = top(&ps->hangstk, 0);
+				pop(&ps->hangstk);
+				if(h != 0)
+					changehang(ps, -10*LISTTAB);
+				changehang(ps, 10*LISTTAB);
+				push(&ps->hangstk, 1);
+				break;
+
+			// <!ELEMENT FONT - - (%text)*>
+			case Tfont:
+				sz = top(&ps->fntsizestk, Normal);
+				if(_tokaval(tok, Asize, &nsz, 0)) {
+					if(_prefix(L(Lplus), nsz))
+						sz = Normal + _Strtol(nsz+1, nil, 10) + ps->adjsize;
+					else if(_prefix(L(Lminus), nsz))
+						sz = Normal - _Strtol(nsz+1, nil, 10) + ps->adjsize;
+					else if(nsz != nil)
+						sz = Normal + (_Strtol(nsz, nil, 10) - 3);
+				}
+				ps->curfg = push(&ps->fgstk, acolorval(tok, Acolor, ps->curfg));
+				pushfontsize(ps, sz);
+				break;
+
+			case Tfont+RBRA:
+				if(ps->fgstk.n == 0) {
+					if(warn)
+						fprint(2, "warning: unexpected </FONT>\n");
+					continue;
+				}
+				ps->curfg = popretnewtop(&ps->fgstk, di->text);
+				popfontsize(ps);
+				break;
+
+			// <!ELEMENT FORM - - %body.content -(FORM) >
+			case Tform:
+				if(is->curform != nil) {
+					if(warn)
+						fprint(2, "warning: <FORM> nested inside another\n");
+					continue;
+				}
+				action = aurlval(tok, Aaction, di->base, di->base);
+				s = aval(tok, Aid);
+				name = astrval(tok, Aname, s);
+				if(s)
+					free(s);
+				target = atargval(tok, di->target);
+				method = atabval(tok, Amethod, method_tab, NMETHODTAB, HGet);
+				if(warn && _tokaval(tok, Aenctype, &enctype, 0) &&
+						_Strcmp(enctype, L(Lappl_form)))
+					fprint(2, "form enctype %S not handled\n", enctype);
+				frm = newform(++is->nforms, name, action, target, method, di->forms);
+				di->forms = frm;
+				is->curform = frm;
+				break;
+
+			case Tform+RBRA:
+				if(is->curform == nil) {
+					if(warn)
+						fprint(2, "warning: unexpected </FORM>\n");
+					continue;
+				}
+				// put fields back in input order
+				is->curform->fields = (Formfield*)_revlist((List*)is->curform->fields);
+				is->curform = nil;
+				break;
+
+			// <!ELEMENT FRAME - O EMPTY>
+			case Tframe:
+				ks = is->kidstk;
+				if(ks == nil) {
+					if(warn)
+						fprint(2, "warning: <FRAME> not in <FRAMESET>\n");
+					continue;
+				}
+				ks->kidinfos = kd = newkidinfo(0, ks->kidinfos);
+				kd->src = aurlval(tok, Asrc, nil, di->base);
+				kd->name = aval(tok, Aname);
+				if(kd->name == nil) {
+					s = _ltoStr(++is->nframes);
+					kd->name = _Strdup2(L(Lfr), s);
+					free(s);
+				}
+				kd->marginw = auintval(tok, Amarginwidth, 0);
+				kd->marginh = auintval(tok, Amarginheight, 0);
+				kd->framebd = auintval(tok, Aframeborder, 1);
+				kd->flags = atabval(tok, Ascrolling, fscroll_tab, NFSCROLLTAB, kd->flags);
+				norsz = aflagval(tok, Anoresize);
+				if(norsz)
+					kd->flags |= FRnoresize;
+				break;
+
+			// <!ELEMENT FRAMESET - - (FRAME|FRAMESET)+>
+			case Tframeset:
+				ks = newkidinfo(1, nil);
+				pks = is->kidstk;
+				if(pks == nil)
+					di->kidinfo = ks;
+				else  {
+					ks->next = pks->kidinfos;
+					pks->kidinfos = ks;
+				}
+				ks->nextframeset = pks;
+				is->kidstk = ks;
+				setdimarray(tok, Arows, &ks->rows, &ks->nrows);
+				if(ks->nrows == 0) {
+					ks->rows = (Dimen*)emalloc(sizeof(Dimen));
+					ks->nrows = 1;
+					ks->rows[0] = makedimen(Dpercent, 100);
+				}
+				setdimarray(tok, Acols, &ks->cols, &ks->ncols);
+				if(ks->ncols == 0) {
+					ks->cols = (Dimen*)emalloc(sizeof(Dimen));
+					ks->ncols = 1;
+					ks->cols[0] = makedimen(Dpercent, 100);
+				}
+				break;
+
+			case Tframeset+RBRA:
+				if(is->kidstk == nil) {
+					if(warn)
+						fprint(2, "warning: unexpected </FRAMESET>\n");
+					continue;
+				}
+				ks = is->kidstk;
+				// put kids back in original order
+				// and add blank frames to fill out cells
+				n = ks->nrows*ks->ncols;
+				nblank = n - _listlen((List*)ks->kidinfos);
+				while(nblank-- > 0)
+					ks->kidinfos = newkidinfo(0, ks->kidinfos);
+				ks->kidinfos = (Kidinfo*)_revlist((List*)ks->kidinfos);
+				is->kidstk = is->kidstk->nextframeset;
+				if(is->kidstk == nil) {
+					// end input
+					ans = nil;
+					goto return_ans;
+				}
+				break;
+
+			// <!ELEMENT H1 - - (%text;)*>, etc.
+			case Th1:
+			case Th2:
+			case Th3:
+			case Th4:
+			case Th5:
+			case Th6:
+				bramt = 1;
+				if(ps->items == ps->lastit)
+					bramt = 0;
+				addbrk(ps, bramt, IFcleft|IFcright);
+				sz = Verylarge - (tag - Th1);
+				if(sz < Tiny)
+					sz = Tiny;
+				pushfontsize(ps, sz);
+				sty = top(&ps->fntstylestk, FntR);
+				if(tag == Th1)
+					sty = FntB;
+				pushfontstyle(ps, sty);
+				pushjust(ps, atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust));
+				ps->skipwhite = 1;
+				break;
+
+			case Th1+RBRA:
+			case Th2+RBRA:
+			case Th3+RBRA:
+			case Th4+RBRA:
+			case Th5+RBRA:
+			case Th6+RBRA:
+				addbrk(ps, 1, IFcleft|IFcright);
+				popfontsize(ps);
+				popfontstyle(ps);
+				popjust(ps);
+				break;
+
+			case Thead:
+				// HTML spec says ignore regular markup in head,
+				// but Netscape and IE don't
+				// ps.skipping = 1;
+				break;
+
+			case Thead+RBRA:
+				ps->skipping = 0;
+				break;
+
+			// <!ELEMENT HR - O EMPTY>
+			case Thr:
+				al = atabval(tok, Aalign, align_tab, NALIGNTAB, ALcenter);
+				sz = auintval(tok, Asize, HRSZ);
+				wd = adimen(tok, Awidth);
+				if(dimenkind(wd) == Dnone)
+					wd = makedimen(Dpercent, 100);
+				nosh = aflagval(tok, Anoshade);
+				additem(ps, newirule(al, sz, nosh, wd), tok);
+				addbrk(ps, 0, 0);
+				break;
+
+			case Ti:
+			case Tcite:
+			case Tdfn:
+			case Tem:
+			case Tvar:
+			case Taddress:
+				pushfontstyle(ps, FntI);
+				break;
+
+			// <!ELEMENT IMG - O EMPTY>
+			case Timg:
+				map = nil;
+				oldcuranchor = ps->curanchor;
+				if(_tokaval(tok, Ausemap, &usemap, 0)) {
+					if(!_prefix(L(Lhash), usemap)) {
+						if(warn)
+							fprint(2, "warning: can't handle non-local map %S\n", usemap);
+					}
+					else {
+						map = getmap(di, usemap+1);
+						if(ps->curanchor == 0) {
+							di->anchors = newanchor(++is->nanchors, nil, nil, di->target, di->anchors);
+							ps->curanchor = is->nanchors;
+						}
+					}
+				}
+				align = atabval(tok, Aalign, align_tab, NALIGNTAB, ALbottom);
+				dfltbd = 0;
+				if(ps->curanchor != 0)
+					dfltbd = 2;
+				src = aurlval(tok, Asrc, nil, di->base);
+				if(src == nil) {
+					if(warn)
+						fprint(2, "warning: <img> has no src attribute\n");
+					ps->curanchor = oldcuranchor;
+					continue;
+				}
+				img = newiimage(src,
+						aval(tok, Aalt),
+						align,
+						auintval(tok, Awidth, 0),
+						auintval(tok, Aheight, 0),
+						auintval(tok, Ahspace, IMGHSPACE),
+						auintval(tok, Avspace, IMGVSPACE),
+						auintval(tok, Aborder, dfltbd),
+						aflagval(tok, Aismap),
+						map);
+				if(align == ALleft || align == ALright) {
+					additem(ps, newifloat(img, align), tok);
+					// if no hspace specified, use FLTIMGHSPACE
+					if(!_tokaval(tok, Ahspace, &val, 0))
+						((Iimage*)img)->hspace = FLTIMGHSPACE;
+				}
+				else {
+					ps->skipwhite = 0;
+					additem(ps, img, tok);
+				}
+				if(!ps->skipping) {
+					((Iimage*)img)->nextimage = di->images;
+					di->images = (Iimage*)img;
+				}
+				ps->curanchor = oldcuranchor;
+				break;
+
+			// <!ELEMENT INPUT - O EMPTY>
+			case Tinput:
+				ps->skipwhite = 0;
+				if(is->curform == nil) {
+					if(warn)
+						fprint(2, "<INPUT> not inside <FORM>\n");
+					continue;
+				}
+				is->curform->fields = field = newformfield(
+						atabval(tok, Atype, input_tab, NINPUTTAB, Ftext),
+						++is->curform->nfields,
+						is->curform,
+						aval(tok, Aname),
+						aval(tok, Avalue),
+						auintval(tok, Asize, 0),
+						auintval(tok, Amaxlength, 1000),
+						is->curform->fields);
+				if(aflagval(tok, Achecked))
+					field->flags = FFchecked;
+
+				switch(field->ftype) {
+				case Ftext:
+				case Fpassword:
+				case Ffile:
+					if(field->size == 0)
+						field->size = 20;
+					break;
+
+				case Fcheckbox:
+					if(field->name == nil) {
+						if(warn)
+							fprint(2, "warning: checkbox form field missing name\n");
+						continue;
+					}
+					if(field->value == nil)
+						field->value = _Strdup(L(Lone));
+					break;
+
+				case Fradio:
+					if(field->name == nil || field->value == nil) {
+						if(warn)
+							fprint(2, "warning: radio form field missing name or value\n");
+						continue;
+					}
+					break;
+
+				case Fsubmit:
+					if(field->value == nil)
+						field->value = _Strdup(L(Lsubmit));
+					if(field->name == nil)
+						field->name = _Strdup(L(Lnoname));
+					break;
+
+				case Fimage:
+					src = aurlval(tok, Asrc, nil, di->base);
+					if(src == nil) {
+						if(warn)
+							fprint(2, "warning: image form field missing src\n");
+						continue;
+					}
+					// width and height attrs aren't specified in HTML 3.2,
+					// but some people provide them and they help avoid
+					// a relayout
+					field->image = newiimage(src,
+						astrval(tok, Aalt, L(Lsubmit)),
+						atabval(tok, Aalign, align_tab, NALIGNTAB, ALbottom),
+						auintval(tok, Awidth, 0), auintval(tok, Aheight, 0),
+						0, 0, 0, 0, nil);
+					ii = (Iimage*)field->image;
+					ii->nextimage = di->images;
+					di->images = ii;
+					break;
+
+				case Freset:
+					if(field->value == nil)
+						field->value = _Strdup(L(Lreset));
+					break;
+
+				case Fbutton:
+					if(field->value == nil)
+						field->value = _Strdup(L(Lspace));
+					break;
+				}
+				ffit = newiformfield(field);
+				additem(ps, ffit, tok);
+				if(ffit->genattr != nil)
+					field->events = ffit->genattr->events;
+				break;
+
+			// <!ENTITY ISINDEX - O EMPTY>
+			case Tisindex:
+				ps->skipwhite = 0;
+				prompt = astrval(tok, Aprompt, L(Lindex));
+				target = atargval(tok, di->target);
+				additem(ps, textit(ps, prompt), tok);
+				frm = newform(++is->nforms,
+						nil,
+						di->base,
+						target,
+						HGet,
+						di->forms);
+				di->forms = frm;
+				ff = newformfield(Ftext,
+						1,
+						frm,
+						_Strdup(L(Lisindex)),
+						nil,
+						50,
+						1000,
+						nil);
+				frm->fields = ff;
+				frm->nfields = 1;
+				additem(ps, newiformfield(ff), tok);
+				addbrk(ps, 1, 0);
+				break;
+
+			// <!ELEMENT LI - O %flow>
+			case Tli:
+				if(ps->listtypestk.n == 0) {
+					if(warn)
+						fprint(2, "<LI> not in list\n");
+					continue;
+				}
+				ty = top(&ps->listtypestk, 0);
+				ty2 = listtyval(tok, ty);
+				if(ty != ty2) {
+					ty = ty2;
+					push(&ps->listtypestk, ty2);
+				}
+				v = aintval(tok, Avalue, top(&ps->listcntstk, 1));
+				if(ty == LTdisc || ty == LTsquare || ty == LTcircle)
+					hang = 10*LISTTAB - 3;
+				else
+					hang = 10*LISTTAB - 1;
+				changehang(ps, hang);
+				addtext(ps, listmark(ty, v));
+				push(&ps->listcntstk, v + 1);
+				changehang(ps, -hang);
+				ps->skipwhite = 1;
+				break;
+
+			// <!ELEMENT MAP - - (AREA)+>
+			case Tmap:
+				if(_tokaval(tok, Aname, &name, 0))
+					is->curmap = getmap(di, name);
+				break;
+
+			case Tmap+RBRA:
+				map = is->curmap;
+				if(map == nil) {
+					if(warn)
+						fprint(2, "warning: unexpected </MAP>\n");
+					continue;
+				}
+				map->areas = (Area*)_revlist((List*)map->areas);
+				break;
+
+			case Tmeta:
+				if(ps->skipping)
+					continue;
+				if(_tokaval(tok, Ahttp_equiv, &equiv, 0)) {
+					val = aval(tok, Acontent);
+					n = _Strlen(equiv);
+					if(!_Strncmpci(equiv, n, L(Lrefresh)))
+						di->refresh = val;
+					else if(!_Strncmpci(equiv, n, L(Lcontent))) {
+						n = _Strlen(val);
+						if(!_Strncmpci(val, n, L(Ljavascript))
+						   || !_Strncmpci(val, n, L(Ljscript1))
+						   || !_Strncmpci(val, n, L(Ljscript)))
+							di->scripttype = TextJavascript;
+						else {
+							if(warn)
+								fprint(2, "unimplemented script type %S\n", val);
+							di->scripttype = UnknownType;
+						}
+					}
+				}
+				break;
+
+			// Nobr is NOT in HMTL 4.0, but it is ubiquitous on the web
+			case Tnobr:
+				ps->skipwhite = 0;
+				ps->curstate &= ~IFwrap;
+				break;
+
+			case Tnobr+RBRA:
+				ps->curstate |= IFwrap;
+				break;
+
+			// We do frames, so skip stuff in noframes
+			case Tnoframes:
+				ps->skipping = 1;
+				break;
+
+			case Tnoframes+RBRA:
+				ps->skipping = 0;
+				break;
+
+			// We do scripts (if enabled), so skip stuff in noscripts
+			case Tnoscript:
+				if(doscripts)
+					ps->skipping = 1;
+				break;
+
+			case Tnoscript+RBRA:
+				if(doscripts)
+					ps->skipping = 0;
+				break;
+
+			// <!ELEMENT OPTION - O (	//PCDATA)>
+			case Toption:
+				if(is->curform == nil || is->curform->fields == nil) {
+					if(warn)
+						fprint(2, "warning: <OPTION> not in <SELECT>\n");
+					continue;
+				}
+				field = is->curform->fields;
+				if(field->ftype != Fselect) {
+					if(warn)
+						fprint(2, "warning: <OPTION> not in <SELECT>\n");
+					continue;
+				}
+				val = aval(tok, Avalue);
+				option = newoption(aflagval(tok, Aselected), val, nil, field->options);
+				field->options = option;
+				option->display =  getpcdata(toks, tokslen, &toki);
+				if(val == nil)
+					option->value = _Strdup(option->display);
+				break;
+
+			// <!ELEMENT P - O (%text)* >
+			case Tp:
+				pushjust(ps, atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust));
+				ps->inpar = 1;
+				ps->skipwhite = 1;
+				break;
+
+			case Tp+RBRA:
+				break;
+
+			// <!ELEMENT PARAM - O EMPTY>
+			// Do something when we do applets...
+			case Tparam:
+				break;
+
+			// <!ELEMENT PRE - - (%text)* -(IMG|BIG|SMALL|SUB|SUP|FONT) >
+			case Tpre:
+				ps->curstate &= ~IFwrap;
+				ps->literal = 1;
+				ps->skipwhite = 0;
+				pushfontstyle(ps, FntT);
+				break;
+
+			case Tpre+RBRA:
+				ps->curstate |= IFwrap;
+				if(ps->literal) {
+					popfontstyle(ps);
+					ps->literal = 0;
+				}
+				break;
+
+			// <!ELEMENT SCRIPT - - CDATA>
+			case Tscript:
+				if(doscripts) {
+					if(!di->hasscripts) {
+						if(di->scripttype == TextJavascript) {
+							// TODO: initialize script if nec.
+							// initjscript(di);
+							di->hasscripts = 1;
+						}
+					}
+				}
+				if(!di->hasscripts) {
+					if(warn)
+						fprint(2, "warning: <SCRIPT> ignored\n");
+					ps->skipping = 1;
+				}
+				else {
+					scriptsrc = aurlval(tok, Asrc, nil, di->base);
+					script = nil;
+					if(scriptsrc != nil) {
+						if(warn)
+							fprint(2, "warning: non-local <SCRIPT> ignored\n");
+						free(scriptsrc);
+					}
+					else {
+						script = getpcdata(toks, tokslen, &toki);
+					}
+					if(script != nil) {
+						if(warn)
+							fprint(2, "script ignored\n");
+						free(script);
+					}
+				}
+				break;
+
+			case Tscript+RBRA:
+				ps->skipping = 0;
+				break;
+
+			// <!ELEMENT SELECT - - (OPTION+)>
+			case Tselect:
+				if(is->curform == nil) {
+					if(warn)
+						fprint(2, "<SELECT> not inside <FORM>\n");
+					continue;
+				}
+				field = newformfield(Fselect,
+					++is->curform->nfields,
+					is->curform,
+					aval(tok, Aname),
+					nil,
+					auintval(tok, Asize, 0),
+					0,
+					is->curform->fields);
+				is->curform->fields = field;
+				if(aflagval(tok, Amultiple))
+					field->flags = FFmultiple;
+				ffit = newiformfield(field);
+				additem(ps, ffit, tok);
+				if(ffit->genattr != nil)
+					field->events = ffit->genattr->events;
+				// throw away stuff until next tag (should be <OPTION>)
+				s = getpcdata(toks, tokslen, &toki);
+				if(s != nil)
+					free(s);
+				break;
+
+			case Tselect+RBRA:
+				if(is->curform == nil || is->curform->fields == nil) {
+					if(warn)
+						fprint(2, "warning: unexpected </SELECT>\n");
+					continue;
+				}
+				field = is->curform->fields;
+				if(field->ftype != Fselect)
+					continue;
+				// put options back in input order
+				field->options = (Option*)_revlist((List*)field->options);
+				break;
+
+			// <!ELEMENT (STRIKE|U) - - (%text)*>
+			case Tstrike:
+			case Tu:
+				ps->curul = push(&ps->ulstk, (tag==Tstrike)? ULmid : ULunder);
+				break;
+
+			case Tstrike+RBRA:
+			case Tu+RBRA:
+				if(ps->ulstk.n == 0) {
+					if(warn)
+						fprint(2, "warning: unexpected %T\n", tok);
+					continue;
+				}
+				ps->curul = popretnewtop(&ps->ulstk, ULnone);
+				break;
+
+			// <!ELEMENT STYLE - - CDATA>
+			case Tstyle:
+				if(warn)
+					fprint(2, "warning: unimplemented <STYLE>\n");
+				ps->skipping = 1;
+				break;
+
+			case Tstyle+RBRA:
+				ps->skipping = 0;
+				break;
+
+			// <!ELEMENT (SUB|SUP) - - (%text)*>
+			case Tsub:
+			case Tsup:
+				if(tag == Tsub)
+					ps->curvoff += SUBOFF;
+				else
+					ps->curvoff -= SUPOFF;
+				push(&ps->voffstk, ps->curvoff);
+				sz = top(&ps->fntsizestk, Normal);
+				pushfontsize(ps, sz - 1);
+				break;
+
+			case Tsub+RBRA:
+			case Tsup+RBRA:
+				if(ps->voffstk.n == 0) {
+					if(warn)
+						fprint(2, "warning: unexpected %T\n", tok);
+					continue;
+				}
+				ps->curvoff = popretnewtop(&ps->voffstk, 0);
+				popfontsize(ps);
+				break;
+
+			// <!ELEMENT TABLE - - (CAPTION?, TR+)>
+			case Ttable:
+				ps->skipwhite = 0;
+				tab = newtable(++is->ntables,
+						aalign(tok),
+						adimen(tok, Awidth),
+						aflagval(tok, Aborder), 
+						auintval(tok, Acellspacing, TABSP),
+						auintval(tok, Acellpadding, TABPAD),
+						makebackground(nil, acolorval(tok, Abgcolor, ps->curbg.color)),
+						tok,
+						is->tabstk);
+				is->tabstk = tab;
+				curtab = tab;
+				break;
+
+			case Ttable+RBRA:
+				if(curtab == nil) {
+					if(warn)
+						fprint(2, "warning: unexpected </TABLE>\n");
+					continue;
+				}
+				isempty = (curtab->cells == nil);
+				if(isempty) {
+					if(warn)
+						fprint(2, "warning: <TABLE> has no cells\n");
+				}
+				else {
+					ps = finishcell(curtab, ps);
+					if(curtab->rows != nil)
+						curtab->rows->flags = 0;
+					finish_table(curtab);
+				}
+				ps->skipping = 0;
+				if(!isempty) {
+					tabitem = newitable(curtab);
+					al = curtab->align.halign;
+					switch(al) {
+					case ALleft:
+					case ALright:
+						additem(ps, newifloat(tabitem, al), tok);
+						break;
+					default:
+						if(al == ALcenter)
+							pushjust(ps, ALcenter);
+						addbrk(ps, 0, 0);
+						if(ps->inpar) {
+							popjust(ps);
+							ps->inpar = 0;
+						}
+						additem(ps, tabitem, curtab->tabletok);
+						if(al == ALcenter)
+							popjust(ps);
+						break;
+					}
+				}
+				if(is->tabstk == nil) {
+					if(warn)
+						fprint(2, "warning: table stack is wrong\n");
+				}
+				else
+					is->tabstk = is->tabstk->next;
+				curtab->next = di->tables;
+				di->tables = curtab;
+				curtab = is->tabstk;
+				if(!isempty)
+					addbrk(ps, 0, 0);
+				break;
+
+			// <!ELEMENT (TH|TD) - O %body.content>
+			// Cells for a row are accumulated in reverse order.
+			// We push ps on a stack, and use a new one to accumulate
+			// the contents of the cell.
+			case Ttd:
+			case Tth:
+				if(curtab == nil) {
+					if(warn)
+						fprint(2, "%T outside <TABLE>\n", tok);
+					continue;
+				}
+				if(ps->inpar) {
+					popjust(ps);
+					ps->inpar = 0;
+				}
+				ps = finishcell(curtab, ps);
+				tr = nil;
+				if(curtab->rows != nil)
+					tr = curtab->rows;
+				if(tr == nil || !tr->flags) {
+					if(warn)
+						fprint(2, "%T outside row\n", tok);
+					tr = newtablerow(makealign(ALnone, ALnone),
+							makebackground(nil, curtab->background.color),
+							TFparsing,
+							curtab->rows);
+					curtab->rows = tr;
+				}
+				ps = cell_pstate(ps, tag == Tth);
+				flags = TFparsing;
+				if(aflagval(tok, Anowrap)) {
+					flags |= TFnowrap;
+					ps->curstate &= ~IFwrap;
+				}
+				if(tag == Tth)
+					flags |= TFisth;
+				c = newtablecell(curtab->cells==nil? 1 : curtab->cells->cellid+1,
+						auintval(tok, Arowspan, 1),
+						auintval(tok, Acolspan, 1), 
+						aalign(tok), 
+						adimen(tok, Awidth),
+						auintval(tok, Aheight, 0),
+						makebackground(nil, acolorval(tok, Abgcolor, tr->background.color)),
+						flags,
+						curtab->cells);
+				curtab->cells = c;
+				ps->curbg = c->background;
+				if(c->align.halign == ALnone) {
+					if(tr->align.halign != ALnone)
+						c->align.halign = tr->align.halign;
+					else if(tag == Tth)
+						c->align.halign = ALcenter;
+					else
+						c->align.halign = ALleft;
+				}
+				if(c->align.valign == ALnone) {
+					if(tr->align.valign != ALnone)
+						c->align.valign = tr->align.valign;
+					else
+						c->align.valign = ALmiddle;
+				}
+				c->nextinrow = tr->cells;
+				tr->cells = c;
+				break;
+
+			case Ttd+RBRA:
+			case Tth+RBRA:
+				if(curtab == nil || curtab->cells == nil) {
+					if(warn)
+						fprint(2, "unexpected %T\n", tok);
+					continue;
+				}
+				ps = finishcell(curtab, ps);
+				break;
+
+			// <!ELEMENT TEXTAREA - - (	//PCDATA)>
+			case Ttextarea:
+				if(is->curform == nil) {
+					if(warn)
+						fprint(2, "<TEXTAREA> not inside <FORM>\n");
+					continue;
+				}
+				field = newformfield(Ftextarea,
+					++is->curform->nfields,
+					is->curform,
+					aval(tok, Aname),
+					nil,
+					0,
+					0,
+					is->curform->fields);
+				is->curform->fields = field;
+				field->rows = auintval(tok, Arows, 3);
+				field->cols = auintval(tok, Acols, 50);
+				field->value = getpcdata(toks, tokslen, &toki);
+				if(warn && toki < tokslen - 1 && toks[toki + 1].tag != Ttextarea + RBRA)
+					fprint(2, "warning: <TEXTAREA> data ended by %T\n", &toks[toki + 1]);
+				ffit = newiformfield(field);
+				additem(ps, ffit, tok);
+				if(ffit->genattr != nil)
+					field->events = ffit->genattr->events;
+				break;
+
+			// <!ELEMENT TITLE - - (	//PCDATA)* -(%head.misc)>
+			case Ttitle:
+				di->doctitle = getpcdata(toks, tokslen, &toki);
+				if(warn && toki < tokslen - 1 && toks[toki + 1].tag != Ttitle + RBRA)
+					fprint(2, "warning: <TITLE> data ended by %T\n", &toks[toki + 1]);
+				break;
+
+			// <!ELEMENT TR - O (TH|TD)+>
+			// rows are accumulated in reverse order in curtab->rows
+			case Ttr:
+				if(curtab == nil) {
+					if(warn)
+						fprint(2, "warning: <TR> outside <TABLE>\n");
+					continue;
+				}
+				if(ps->inpar) {
+					popjust(ps);
+					ps->inpar = 0;
+				}
+				ps = finishcell(curtab, ps);
+				if(curtab->rows != nil)
+					curtab->rows->flags = 0;
+				curtab->rows = newtablerow(aalign(tok),
+					makebackground(nil, acolorval(tok, Abgcolor, curtab->background.color)),
+					TFparsing,
+					curtab->rows);
+				break;
+
+			case Ttr+RBRA:
+				if(curtab == nil || curtab->rows == nil) {
+					if(warn)
+						fprint(2, "warning: unexpected </TR>\n");
+					continue;
+				}
+				ps = finishcell(curtab, ps);
+				tr = curtab->rows;
+				if(tr->cells == nil) {
+					if(warn)
+						fprint(2, "warning: empty row\n");
+					curtab->rows = tr->next;
+					tr->next = nil;
+				}
+				else
+					tr->flags = 0;
+				break;
+
+			// <!ELEMENT (TT|CODE|KBD|SAMP) - - (%text)*>
+			case Ttt:
+			case Tcode:
+			case Tkbd:
+			case Tsamp:
+				pushfontstyle(ps, FntT);
+				break;
+
+			// Tags that have empty action
+			case Tabbr:
+			case Tabbr+RBRA:
+			case Tacronym:
+			case Tacronym+RBRA:
+			case Tarea+RBRA:
+			case Tbase+RBRA:
+			case Tbasefont+RBRA:
+			case Tbr+RBRA:
+			case Tdd+RBRA:
+			case Tdt+RBRA:
+			case Tframe+RBRA:
+			case Thr+RBRA:
+			case Thtml:
+			case Thtml+RBRA:
+			case Timg+RBRA:
+			case Tinput+RBRA:
+			case Tisindex+RBRA:
+			case Tli+RBRA:
+			case Tlink:
+			case Tlink+RBRA:
+			case Tmeta+RBRA:
+			case Toption+RBRA:
+			case Tparam+RBRA:
+			case Ttextarea+RBRA:
+			case Ttitle+RBRA:
+				break;
+
+
+			// Tags not implemented
+			case Tbdo:
+			case Tbdo+RBRA:
+			case Tbutton:
+			case Tbutton+RBRA:
+			case Tdel:
+			case Tdel+RBRA:
+			case Tfieldset:
+			case Tfieldset+RBRA:
+			case Tiframe:
+			case Tiframe+RBRA:
+			case Tins:
+			case Tins+RBRA:
+			case Tlabel:
+			case Tlabel+RBRA:
+			case Tlegend:
+			case Tlegend+RBRA:
+			case Tobject:
+			case Tobject+RBRA:
+			case Toptgroup:
+			case Toptgroup+RBRA:
+			case Tspan:
+			case Tspan+RBRA:
+				if(warn) {
+					if(tag > RBRA)
+						tag -= RBRA;
+					fprint(2, "warning: unimplemented HTML tag: %S\n", tagnames[tag]);
+				}
+				break;
+
+			default:
+				if(warn)
+					fprint(2, "warning: unknown HTML tag: %S\n", tok->text);
+				break;
+			}
+	}
+	// some pages omit trailing </table>
+	while(curtab != nil) {
+		if(warn)
+			fprint(2, "warning: <TABLE> not closed\n");
+		if(curtab->cells != nil) {
+			ps = finishcell(curtab, ps);
+			if(curtab->cells == nil) {
+				if(warn)
+					fprint(2, "warning: empty table\n");
+			}
+			else {
+				if(curtab->rows != nil)
+					curtab->rows->flags = 0;
+				finish_table(curtab);
+				ps->skipping = 0;
+				additem(ps, newitable(curtab), curtab->tabletok);
+				addbrk(ps, 0, 0);
+			}
+		}
+		if(is->tabstk != nil)
+			is->tabstk = is->tabstk->next;
+		curtab->next = di->tables;
+		di->tables = curtab;
+		curtab = is->tabstk;
+	}
+	outerps = lastps(ps);
+	ans = outerps->items->next;
+	// note: ans may be nil and di->kids not nil, if there's a frameset!
+	outerps->items = newispacer(ISPnull);
+	outerps->lastit = outerps->items;
+	is->psstk = ps;
+	if(ans != nil && di->hasscripts) {
+		// TODO evalscript(nil);
+		;
+	}
+
+return_ans:
+	if(dbgbuild) {
+		assert(validitems(ans));
+		if(ans == nil)
+			fprint(2, "getitems returning nil\n");
+		else
+			printitems(ans, "getitems returning:");
+	}
+	return ans;
+}
+
+// Concatenate together maximal set of Data tokens, starting at toks[toki+1].
+// Lexer has ensured that there will either be a following non-data token or
+// we will be at eof.
+// Return emallocd trimmed concatenation, and update *ptoki to last used toki
+static Rune*
+getpcdata(Token* toks, int tokslen, int* ptoki)
+{
+	Rune*	ans;
+	Rune*	p;
+	Rune*	trimans;
+	int	anslen;
+	int	trimanslen;
+	int	toki;
+	Token*	tok;
+
+	ans = nil;
+	anslen = 0;
+	// first find length of answer
+	toki = (*ptoki) + 1;
+	while(toki < tokslen) {
+		tok = &toks[toki];
+		if(tok->tag == Data) {
+			toki++;
+			anslen += _Strlen(tok->text);
+		}
+		else
+			break;
+	}
+	// now make up the initial answer
+	if(anslen > 0) {
+		ans = _newstr(anslen);
+		p = ans;
+		toki = (*ptoki) + 1;
+		while(toki < tokslen) {
+			tok = &toks[toki];
+			if(tok->tag == Data) {
+				toki++;
+				p = _Stradd(p, tok->text, _Strlen(tok->text));
+			}
+			else
+				break;
+		}
+		*p = 0;
+		_trimwhite(ans, anslen, &trimans, &trimanslen);
+		if(trimanslen != anslen) {
+			p = ans;
+			ans = _Strndup(trimans, trimanslen);
+			free(p);
+		}
+	}
+	*ptoki = toki-1;
+	return ans;
+}
+
+// If still parsing head of curtab->cells list, finish it off
+// by transferring the items on the head of psstk to the cell.
+// Then pop the psstk and return the new psstk.
+static Pstate*
+finishcell(Table* curtab, Pstate* psstk)
+{
+	Tablecell*	c;
+	Pstate* psstknext;
+
+	c = curtab->cells;
+	if(c != nil) {
+		if((c->flags&TFparsing)) {
+			psstknext = psstk->next;
+			if(psstknext == nil) {
+				if(warn)
+					fprint(2, "warning: parse state stack is wrong\n");
+			}
+			else {
+				c->content = psstk->items->next;
+				c->flags &= ~TFparsing;
+				freepstate(psstk);
+				psstk = psstknext;
+			}
+		}
+	}
+	return psstk;
+}
+
+// Make a new Pstate for a cell, based on the old pstate, oldps.
+// Also, put the new ps on the head of the oldps stack.
+static Pstate*
+cell_pstate(Pstate* oldps, int ishead)
+{
+	Pstate*	ps;
+	int	sty;
+
+	ps = newpstate(oldps);
+	ps->skipwhite = 1;
+	ps->curanchor = oldps->curanchor;
+	copystack(&ps->fntstylestk, &oldps->fntstylestk);
+	copystack(&ps->fntsizestk, &oldps->fntsizestk);
+	ps->curfont = oldps->curfont;
+	ps->curfg = oldps->curfg;
+	ps->curbg = oldps->curbg;
+	copystack(&ps->fgstk, &oldps->fgstk);
+	ps->adjsize = oldps->adjsize;
+	if(ishead) {
+		sty = ps->curfont%NumSize;
+		ps->curfont = FntB*NumSize + sty;
+	}
+	return ps;
+}
+
+// Return a new Pstate with default starting state.
+// Use link to add it to head of a list, if any.
+static Pstate*
+newpstate(Pstate* link)
+{
+	Pstate*	ps;
+
+	ps = (Pstate*)emalloc(sizeof(Pstate));
+	ps->curfont = DefFnt;
+	ps->curfg = Black;
+	ps->curbg.image = nil;
+	ps->curbg.color = White;
+	ps->curul = ULnone;
+	ps->curjust = ALleft;
+	ps->curstate = IFwrap;
+	ps->items = newispacer(ISPnull);
+	ps->lastit = ps->items;
+	ps->prelastit = nil;
+	ps->next = link;
+	return ps;
+}
+
+// Return last Pstate on psl list
+static Pstate*
+lastps(Pstate* psl)
+{
+	assert(psl != nil);
+	while(psl->next != nil)
+		psl = psl->next;
+	return psl;
+}
+
+// Add it to end of ps item chain, adding in current state from ps.
+// Also, if tok is not nil, scan it for generic attributes and assign
+// the genattr field of the item accordingly.
+static void
+additem(Pstate* ps, Item* it, Token* tok)
+{
+	int	aid;
+	int	any;
+	Rune*	i;
+	Rune*	c;
+	Rune*	s;
+	Rune*	t;
+	Attr*	a;
+	SEvent*	e;
+
+	if(ps->skipping) {
+		if(warn)
+			fprint(2, "warning: skipping item: %I\n", it);
+		return;
+	}
+	it->anchorid = ps->curanchor;
+	it->state |= ps->curstate;
+	if(tok != nil) {
+		any = 0;
+		i = nil;
+		c = nil;
+		s = nil;
+		t = nil;
+		e = nil;
+		for(a = tok->attr; a != nil; a = a->next) {
+			aid = a->attid;
+			if(!attrinfo[aid])
+				continue;
+			switch(aid) {
+			case Aid:
+				i = a->value;
+				break;
+
+			case Aclass:
+				c = a->value;
+				break;
+
+			case Astyle:
+				s = a->value;
+				break;
+
+			case Atitle:
+				t = a->value;
+				break;
+
+			default:
+				assert(aid >= Aonblur && aid <= Aonunload);
+				e = newscriptevent(scriptev[a->attid], a->value, e);
+				break;
+			}
+			a->value = nil;
+			any = 1;
+		}
+		if(any)
+			it->genattr = newgenattr(i, c, s, t, e);
+	}
+	ps->curstate &= ~(IFbrk|IFbrksp|IFnobrk|IFcleft|IFcright);
+	ps->prelastit = ps->lastit;
+	ps->lastit->next = it;
+	ps->lastit = it;
+}
+
+// Make a text item out of s,
+// using current font, foreground, vertical offset and underline state.
+static Item*
+textit(Pstate* ps, Rune* s)
+{
+	assert(s != nil);
+	return newitext(s, ps->curfont, ps->curfg, ps->curvoff + Voffbias, ps->curul);
+}
+
+// Add text item or items for s, paying attention to
+// current font, foreground, baseline offset, underline state,
+// and literal mode.  Unless we're in literal mode, compress
+// whitespace to single blank, and, if curstate has a break,
+// trim any leading whitespace.  Whether in literal mode or not,
+// turn nonbreaking spaces into spacer items with IFnobrk set.
+//
+// In literal mode, break up s at newlines and add breaks instead.
+// Also replace tabs appropriate number of spaces.
+// In nonliteral mode, break up the items every 100 or so characters
+// just to make the layout algorithm not go quadratic.
+//
+// addtext assumes ownership of s.
+static void
+addtext(Pstate* ps, Rune* s)
+{
+	int	n;
+	int	i;
+	int	j;
+	int	k;
+	int	col;
+	int	c;
+	int	nsp;
+	Item*	it;
+	Rune*	ss;
+	Rune*	p;
+	Rune	buf[SMALLBUFSIZE];
+
+	assert(s != nil);
+	n = runestrlen(s);
+	i = 0;
+	j = 0;
+	if(ps->literal) {
+		col = 0;
+		while(i < n) {
+			if(s[i] == '\n') {
+				if(i > j) {
+					// trim trailing blanks from line
+					for(k = i; k > j; k--)
+						if(s[k - 1] != ' ')
+							break;
+					if(k > j)
+						additem(ps, textit(ps, _Strndup(s+j, k-j)), nil);
+				}
+				addlinebrk(ps, 0);
+				j = i + 1;
+				col = 0;
+			}
+			else {
+				if(s[i] == '\t') {
+					col += i - j;
+					nsp = 8 - (col%8);
+					// make ss = s[j:i] + nsp spaces
+					ss = _newstr(i-j+nsp);
+					p = _Stradd(ss, s+j, i-j);
+					p = _Stradd(p, L(Ltab2space), nsp);
+					*p = 0;
+					additem(ps, textit(ps, ss), nil);
+					col += nsp;
+					j = i + 1;
+				}
+				else if(s[i] == NBSP) {
+					if(i > j)
+						additem(ps, textit(ps, _Strndup(s+j, i-j)), nil);
+					addnbsp(ps);
+					col += (i - j) + 1;
+					j = i + 1;
+				}
+			}
+			i++;
+		}
+		if(i > j) {
+			if(j == 0 && i == n) {
+				// just transfer s over
+				additem(ps, textit(ps, s), nil);
+			}
+			else {
+				additem(ps, textit(ps, _Strndup(s+j, i-j)), nil);
+				free(s);
+			}
+		}
+	}
+	else {	// not literal mode
+		if((ps->curstate&IFbrk) || ps->lastit == ps->items)
+			while(i < n) {
+				c = s[i];
+				if(c >= 256 || !isspace(c))
+					break;
+				i++;
+			}
+		p = buf;
+		for(j = i; i < n; i++) {
+			assert(p+i-j < buf+SMALLBUFSIZE-1);
+			c = s[i];
+			if(c == NBSP) {
+				if(i > j)
+					p = _Stradd(p, s+j, i-j);
+				if(p > buf)
+					additem(ps, textit(ps, _Strndup(buf, p-buf)), nil);
+				p = buf;
+				addnbsp(ps);
+				j = i + 1;
+				continue;
+			}
+			if(c < 256 && isspace(c)) {
+				if(i > j)
+					p = _Stradd(p, s+j, i-j);
+				*p++ = ' ';
+				while(i < n - 1) {
+					c = s[i + 1];
+					if(c >= 256 || !isspace(c))
+						break;
+					i++;
+				}
+				j = i + 1;
+			}
+			if(i - j >= 100) {
+				p = _Stradd(p, s+j, i+1-j);
+				j = i + 1;
+			}
+			if(p-buf >= 100) {
+				additem(ps, textit(ps, _Strndup(buf, p-buf)), nil);
+				p = buf;
+			}
+		}
+		if(i > j && j < n) {
+			assert(p+i-j < buf+SMALLBUFSIZE-1);
+			p = _Stradd(p, s+j, i-j);
+		}
+		// don't add a space if previous item ended in a space
+		if(p-buf == 1 && buf[0] == ' ' && ps->lastit != nil) {
+			it = ps->lastit;
+			if(it->tag == Itexttag) {
+				ss = ((Itext*)it)->s;
+				k = _Strlen(ss);
+				if(k > 0 && ss[k] == ' ')
+					p = buf;
+			}
+		}
+		if(p > buf)
+			additem(ps, textit(ps, _Strndup(buf, p-buf)), nil);
+		free(s);
+	}
+}
+
+// Add a break to ps->curstate, with extra space if sp is true.
+// If there was a previous break, combine this one's parameters
+// with that to make the amt be the max of the two and the clr
+// be the most general. (amt will be 0 or 1)
+// Also, if the immediately preceding item was a text item,
+// trim any whitespace from the end of it, if not in literal mode.
+// Finally, if this is at the very beginning of the item list
+// (the only thing there is a null spacer), then don't add the space.
+static void
+addbrk(Pstate* ps, int sp, int clr)
+{
+	int	state;
+	Rune*	l;
+	int		nl;
+	Rune*	r;
+	int		nr;
+	Itext*	t;
+	Rune*	s;
+
+	state = ps->curstate;
+	clr = clr|(state&(IFcleft|IFcright));
+	if(sp && !(ps->lastit == ps->items))
+		sp = IFbrksp;
+	else
+		sp = 0;
+	ps->curstate = IFbrk|sp|(state&~(IFcleft|IFcright))|clr;
+	if(ps->lastit != ps->items) {
+		if(!ps->literal && ps->lastit->tag == Itexttag) {
+			t = (Itext*)ps->lastit;
+			_splitr(t->s, _Strlen(t->s), notwhitespace, &l, &nl, &r, &nr);
+			// try to avoid making empty items
+			// but not crucial f the occasional one gets through
+			if(nl == 0 && ps->prelastit != nil) {
+				ps->lastit = ps->prelastit;
+				ps->lastit->next = nil;
+				ps->prelastit = nil;
+			}
+			else {
+				s = t->s;
+				if(nl == 0) {
+					// need a non-nil pointer to empty string
+					// (_Strdup(L(Lempty)) returns nil)
+					t->s = emalloc(sizeof(Rune));
+					t->s[0] = 0;
+				}
+				else
+					t->s = _Strndup(l, nl);
+				if(s)
+					free(s);
+			}
+		}
+	}
+}
+
+// Add break due to a <br> or a newline within a preformatted section.
+// We add a null item first, with current font's height and ascent, to make
+// sure that the current line takes up at least that amount of vertical space.
+// This ensures that <br>s on empty lines cause blank lines, and that
+// multiple <br>s in a row give multiple blank lines.
+// However don't add the spacer if the previous item was something that
+// takes up space itself.
+static void
+addlinebrk(Pstate* ps, int clr)
+{
+	int	obrkstate;
+	int	b;
+	int	addit;
+
+	// don't want break before our null item unless the previous item
+	// was also a null item for the purposes of line breaking
+	obrkstate = ps->curstate&(IFbrk|IFbrksp);
+	b = IFnobrk;
+	addit = 0;
+	if(ps->lastit != nil) {
+		if(ps->lastit->tag == Ispacertag) {
+			if(((Ispacer*)ps->lastit)->spkind == ISPvline)
+				b = IFbrk;
+			addit = 1;
+		}
+		else if(ps->lastit->tag == Ifloattag)
+			addit = 1;
+	}
+	if(addit) {
+		ps->curstate = (ps->curstate&~(IFbrk|IFbrksp))|b;
+		additem(ps, newispacer(ISPvline), nil);
+		ps->curstate = (ps->curstate&~(IFbrk|IFbrksp))|obrkstate;
+	}
+	addbrk(ps, 0, clr);
+}
+
+// Add a nonbreakable space
+static void
+addnbsp(Pstate* ps)
+{
+	// if nbsp comes right where a break was specified,
+	// do the break anyway (nbsp is being used to generate undiscardable
+	// space rather than to prevent a break)
+	if((ps->curstate&IFbrk) == 0)
+		ps->curstate |= IFnobrk;
+	additem(ps, newispacer(ISPhspace), nil);
+	// but definitely no break on next item
+	ps->curstate |= IFnobrk;
+}
+
+// Change hang in ps.curstate by delta.
+// The amount is in 1/10ths of tabs, and is the amount that
+// the current contiguous set of items with a hang value set
+// is to be shifted left from its normal (indented) place.
+static void
+changehang(Pstate* ps, int delta)
+{
+	int	amt;
+
+	amt = (ps->curstate&IFhangmask) + delta;
+	if(amt < 0) {
+		if(warn)
+			fprint(2, "warning: hang went negative\n");
+		amt = 0;
+	}
+	ps->curstate = (ps->curstate&~IFhangmask)|amt;
+}
+
+// Change indent in ps.curstate by delta.
+static void
+changeindent(Pstate* ps, int delta)
+{
+	int	amt;
+
+	amt = ((ps->curstate&IFindentmask) >> IFindentshift) + delta;
+	if(amt < 0) {
+		if(warn)
+			fprint(2, "warning: indent went negative\n");
+		amt = 0;
+	}
+	ps->curstate = (ps->curstate&~IFindentmask)|(amt << IFindentshift);
+}
+
+// Push val on top of stack, and also return value pushed
+static int
+push(Stack* stk, int val)
+{
+	if(stk->n == Nestmax) {
+		if(warn)
+			fprint(2, "warning: build stack overflow\n");
+	}
+	else
+		stk->slots[stk->n++] = val;
+	return val;
+}
+
+// Pop top of stack
+static void
+pop(Stack* stk)
+{
+	if(stk->n > 0)
+		--stk->n;
+}
+
+//Return top of stack, using dflt if stack is empty
+static int
+top(Stack* stk, int dflt)
+{
+	if(stk->n == 0)
+		return dflt;
+	return stk->slots[stk->n-1];
+}
+
+// pop, then return new top, with dflt if empty
+static int
+popretnewtop(Stack* stk, int dflt)
+{
+	if(stk->n == 0)
+		return dflt;
+	stk->n--;
+	if(stk->n == 0)
+		return dflt;
+	return stk->slots[stk->n-1];
+}
+
+// Copy fromstk entries into tostk
+static void
+copystack(Stack* tostk, Stack* fromstk)
+{
+	int n;
+
+	n = fromstk->n;
+	tostk->n = n;
+	memmove(tostk->slots, fromstk->slots, n*sizeof(int));
+}
+
+static void
+popfontstyle(Pstate* ps)
+{
+	pop(&ps->fntstylestk);
+	setcurfont(ps);
+}
+
+static void
+pushfontstyle(Pstate* ps, int sty)
+{
+	push(&ps->fntstylestk, sty);
+	setcurfont(ps);
+}
+
+static void
+popfontsize(Pstate* ps)
+{
+	pop(&ps->fntsizestk);
+	setcurfont(ps);
+}
+
+static void
+pushfontsize(Pstate* ps, int sz)
+{
+	push(&ps->fntsizestk, sz);
+	setcurfont(ps);
+}
+
+static void
+setcurfont(Pstate* ps)
+{
+	int	sty;
+	int	sz;
+
+	sty = top(&ps->fntstylestk, FntR);
+	sz = top(&ps->fntsizestk, Normal);
+	if(sz < Tiny)
+		sz = Tiny;
+	if(sz > Verylarge)
+		sz = Verylarge;
+	ps->curfont = sty*NumSize + sz;
+}
+
+static void
+popjust(Pstate* ps)
+{
+	pop(&ps->juststk);
+	setcurjust(ps);
+}
+
+static void
+pushjust(Pstate* ps, int j)
+{
+	push(&ps->juststk, j);
+	setcurjust(ps);
+}
+
+static void
+setcurjust(Pstate* ps)
+{
+	int	j;
+	int	state;
+
+	j = top(&ps->juststk, ALleft);
+	if(j != ps->curjust) {
+		ps->curjust = j;
+		state = ps->curstate;
+		state &= ~(IFrjust|IFcjust);
+		if(j == ALcenter)
+			state |= IFcjust;
+		else if(j == ALright)
+			state |= IFrjust;
+		ps->curstate = state;
+	}
+}
+
+// Do final rearrangement after table parsing is finished
+// and assign cells to grid points
+static void
+finish_table(Table* t)
+{
+	int	ncol;
+	int	nrow;
+	int	r;
+	Tablerow*	rl;
+	Tablecell*	cl;
+	int*	rowspancnt;
+	Tablecell**	rowspancell;
+	int	ri;
+	int	ci;
+	Tablecell*	c;
+	Tablecell*	cnext;
+	Tablerow*	row;
+	Tablerow*	rownext;
+	int	rcols;
+	int	newncol;
+	int	k;
+	int	j;
+	int	cspan;
+	int	rspan;
+	int	i;
+
+	rl = t->rows;
+	t->nrow = nrow = _listlen((List*)rl);
+	t->rows = (Tablerow*)emalloc(nrow * sizeof(Tablerow));
+	ncol = 0;
+	r = nrow - 1;
+	for(row = rl; row != nil; row = rownext) {
+		// copy the data from the allocated Tablerow into the array slot
+		t->rows[r] = *row;
+		rownext = row->next;
+		row = &t->rows[r];
+		r--;
+		rcols = 0;
+		c = row->cells;
+
+		// If rowspan is > 1 but this is the last row,
+		// reset the rowspan
+		if(c != nil && c->rowspan > 1 && r == nrow-2)
+				c->rowspan = 1;
+
+		// reverse row->cells list (along nextinrow pointers)
+		row->cells = nil;
+		while(c != nil) {
+			cnext = c->nextinrow;
+			c->nextinrow = row->cells;
+			row->cells = c;
+			rcols += c->colspan;
+			c = cnext;
+		}
+		if(rcols > ncol)
+			ncol = rcols;
+	}
+	t->ncol = ncol;
+	t->cols = (Tablecol*)emalloc(ncol * sizeof(Tablecol));
+
+	// Reverse cells just so they are drawn in source order.
+	// Also, trim their contents so they don't end in whitespace.
+	t->cells = (Tablecell*)_revlist((List*)t->cells);
+	for(c = t->cells; c != nil; c= c->next)
+		trim_cell(c);
+	t->grid = (Tablecell***)emalloc(nrow * sizeof(Tablecell**));
+	for(i = 0; i < nrow; i++)
+		t->grid[i] = (Tablecell**)emalloc(ncol * sizeof(Tablecell*));
+
+	// The following arrays keep track of cells that are spanning
+	// multiple rows;  rowspancnt[i] is the number of rows left
+	// to be spanned in column i.
+	// When done, cell's (row,col) is upper left grid point.
+	rowspancnt = (int*)emalloc(ncol * sizeof(int));
+	rowspancell = (Tablecell**)emalloc(ncol * sizeof(Tablecell*));
+	for(ri = 0; ri < nrow; ri++) {
+		row = &t->rows[ri];
+		cl = row->cells;
+		ci = 0;
+		while(ci < ncol || cl != nil) {
+			if(ci < ncol && rowspancnt[ci] > 0) {
+				t->grid[ri][ci] = rowspancell[ci];
+				rowspancnt[ci]--;
+				ci++;
+			}
+			else {
+				if(cl == nil) {
+					ci++;
+					continue;
+				}
+				c = cl;
+				cl = cl->nextinrow;
+				cspan = c->colspan;
+				rspan = c->rowspan;
+				if(ci + cspan > ncol) {
+					// because of row spanning, we calculated
+					// ncol incorrectly; adjust it
+					newncol = ci + cspan;
+					t->cols = (Tablecol*)erealloc(t->cols, newncol * sizeof(Tablecol));
+					rowspancnt = (int*)erealloc(rowspancnt, newncol * sizeof(int));
+					rowspancell = (Tablecell**)erealloc(rowspancell, newncol * sizeof(Tablecell*));
+					k = newncol-ncol;
+					memset(t->cols+ncol, 0, k*sizeof(Tablecol));
+					memset(rowspancnt+ncol, 0, k*sizeof(int));
+					memset(rowspancell+ncol, 0, k*sizeof(Tablecell*));
+					for(j = 0; j < nrow; j++) {
+						t->grid[j] = (Tablecell**)erealloc(t->grid[j], newncol * sizeof(Tablecell*));
+						memset(t->grid[j], 0, k*sizeof(Tablecell*));
+					}
+					t->ncol = ncol = newncol;
+				}
+				c->row = ri;
+				c->col = ci;
+				for(i = 0; i < cspan; i++) {
+					t->grid[ri][ci] = c;
+					if(rspan > 1) {
+						rowspancnt[ci] = rspan - 1;
+						rowspancell[ci] = c;
+					}
+					ci++;
+				}
+			}
+		}
+	}
+}
+
+// Remove tail of cell content until it isn't whitespace.
+static void
+trim_cell(Tablecell* c)
+{
+	int	dropping;
+	Rune*	s;
+	Rune*	x;
+	Rune*	y;
+	int		nx;
+	int		ny;
+	Item*	p;
+	Itext*	q;
+	Item*	pprev;
+
+	dropping = 1;
+	while(c->content != nil && dropping) {
+		p = c->content;
+		pprev = nil;
+		while(p->next != nil) {
+			pprev = p;
+			p = p->next;
+		}
+		dropping = 0;
+		if(!(p->state&IFnobrk)) {
+			if(p->tag == Itexttag) {
+				q = (Itext*)p;
+				s = q->s;
+				_splitr(s, _Strlen(s), notwhitespace, &x, &nx, &y, &ny);
+				if(nx != 0 && ny != 0) {
+					q->s = _Strndup(x, nx);
+					free(s);
+				}
+				break;
+			}
+		}
+		if(dropping) {
+			if(pprev == nil)
+				c->content = nil;
+			else
+				pprev->next = nil;
+			freeitem(p);
+		}
+	}
+}
+
+// Caller must free answer (eventually).
+static Rune*
+listmark(uchar ty, int n)
+{
+	Rune*	s;
+	Rune*	t;
+	int	n2;
+	int	i;
+
+	s = nil;
+	switch(ty) {
+	case LTdisc:
+	case LTsquare:
+	case LTcircle:
+		s = _newstr(1);
+		s[0] = (ty == LTdisc)? 0x2022		// bullet
+			: ((ty == LTsquare)? 0x220e	// filled square
+			    : 0x2218);				// degree
+		s[1] = 0;
+		break;
+
+	case LT1:
+		t = _ltoStr(n);
+		n2 = _Strlen(t);
+		s = _newstr(n2+1);
+		t = _Stradd(s, t, n2);
+		*t++ = '.';
+		*t = 0;
+		break;
+
+	case LTa:
+	case LTA:
+		n--;
+		i = 0;
+		if(n < 0)
+			n = 0;
+		s = _newstr((n <= 25)? 2 : 3);
+		if(n > 25) {
+			n2 = n%26;
+			n /= 26;
+			if(n2 > 25)
+				n2 = 25;
+			s[i++] = n2 + (ty == LTa)? 'a' : 'A';
+		}
+		s[i++] = n + (ty == LTa)? 'a' : 'A';
+		s[i++] = '.';
+		s[i] = 0;
+		break;
+
+	case LTi:
+	case LTI:
+		if(n >= NROMAN) {
+			if(warn)
+				fprint(2, "warning: unimplemented roman number > %d\n", NROMAN);
+			n = NROMAN;
+		}
+		t = roman[n - 1];
+		n2 = _Strlen(t);
+		s = _newstr(n2+1);
+		for(i = 0; i < n2; i++)
+			s[i] = (ty == LTi)? tolower(t[i]) : t[i];
+		s[i++] = '.';
+		s[i] = 0;
+		break;
+	}
+	return s;
+}
+
+// Find map with given name in di.maps.
+// If not there, add one, copying name.
+// Ownership of map remains with di->maps list.
+static Map*
+getmap(Docinfo* di, Rune* name)
+{
+	Map*	m;
+
+	for(m = di->maps; m != nil; m = m->next) {
+		if(!_Strcmp(name, m->name))
+			return m;
+	}
+	m = (Map*)emalloc(sizeof(Map));
+	m->name = _Strdup(name);
+	m->areas = nil;
+	m->next = di->maps;
+	di->maps = m;
+	return m;
+}
+
+// Transfers ownership of href to Area
+static Area*
+newarea(int shape, Rune* href, int target, Area* link)
+{
+	Area* a;
+
+	a = (Area*)emalloc(sizeof(Area));
+	a->shape = shape;
+	a->href = href;
+	a->target = target;
+	a->next = link;
+	return a;
+}
+
+// Return string value associated with attid in tok, nil if none.
+// Caller must free the result (eventually).
+static Rune*
+aval(Token* tok, int attid)
+{
+	Rune*	ans;
+
+	_tokaval(tok, attid, &ans, 1);	// transfers string ownership from token to ans
+	return ans;
+}
+
+// Like aval, but use dflt if there was no such attribute in tok.
+// Caller must free the result (eventually).
+static Rune*
+astrval(Token* tok, int attid, Rune* dflt)
+{
+	Rune*	ans;
+
+	if(_tokaval(tok, attid, &ans, 1))
+		return ans;	// transfers string ownership from token to ans
+	else
+		return _Strdup(dflt);
+}
+
+// Here we're supposed to convert to an int,
+// and have a default when not found
+static int
+aintval(Token* tok, int attid, int dflt)
+{
+	Rune*	ans;
+
+	if(!_tokaval(tok, attid, &ans, 0) || ans == nil)
+		return dflt;
+	else
+		return toint(ans);
+}
+
+// Like aintval, but result should be >= 0
+static int
+auintval(Token* tok, int attid, int dflt)
+{
+	Rune* ans;
+	int v;
+
+	if(!_tokaval(tok, attid, &ans, 0) || ans == nil)
+		return dflt;
+	else {
+		v = toint(ans);
+		return v >= 0? v : 0;
+	}
+}
+
+// int conversion, but with possible error check (if warning)
+static int
+toint(Rune* s)
+{
+	int ans;
+	Rune* eptr;
+
+	ans = _Strtol(s, &eptr, 10);
+	if(warn) {
+		if(*eptr != 0) {
+			eptr = _Strclass(eptr, notwhitespace);
+			if(eptr != nil)
+				fprint(2, "warning: expected integer, got %S\n", s);
+		}
+	}
+	return ans;
+}
+
+// Attribute value when need a table to convert strings to ints
+static int
+atabval(Token* tok, int attid, StringInt* tab, int ntab, int dflt)
+{
+	Rune*	aval;
+	int	ans;
+
+	ans = dflt;
+	if(_tokaval(tok, attid, &aval, 0)) {
+		if(!_lookup(tab, ntab, aval, _Strlen(aval), &ans)) {
+			ans = dflt;
+			if(warn)
+				fprint(2, "warning: name not found in table lookup: %S\n", aval);
+		}
+	}
+	return ans;
+}
+
+// Attribute value when supposed to be a color
+static int
+acolorval(Token* tok, int attid, int dflt)
+{
+	Rune*	aval;
+	int	ans;
+
+	ans = dflt;
+	if(_tokaval(tok, attid, &aval, 0))
+		ans = color(aval, dflt);
+	return ans;
+}
+
+// Attribute value when supposed to be a target frame name
+static int
+atargval(Token* tok, int dflt)
+{
+	int	ans;
+	Rune*	aval;
+
+	ans = dflt;
+	if(_tokaval(tok, Atarget, &aval, 0)){
+		ans = targetid(aval);
+	}
+	return ans;
+}
+
+// special for list types, where "i" and "I" are different,
+// but "square" and "SQUARE" are the same
+static int
+listtyval(Token* tok, int dflt)
+{
+	Rune*	aval;
+	int	ans;
+	int	n;
+
+	ans = dflt;
+	if(_tokaval(tok, Atype, &aval, 0)) {
+		n = _Strlen(aval);
+		if(n == 1) {
+			switch(aval[0]) {
+			case '1':
+				ans = LT1;
+				break;
+			case 'A':
+				ans = LTA;
+				break;
+			case 'I':
+				ans = LTI;
+				break;
+			case 'a':
+				ans = LTa;
+				break;
+			case 'i':
+				ans = LTi;
+			default:
+				if(warn)
+					fprint(2, "warning: unknown list element type %c\n", aval[0]);
+			}
+		}
+		else {
+			if(!_Strncmpci(aval, n, L(Lcircle)))
+				ans = LTcircle;
+			else if(!_Strncmpci(aval, n, L(Ldisc)))
+				ans = LTdisc;
+			else if(!_Strncmpci(aval, n, L(Lsquare)))
+				ans = LTsquare;
+			else {
+				if(warn)
+					fprint(2, "warning: unknown list element type %S\n", aval);
+			}
+		}
+	}
+	return ans;
+}
+
+// Attribute value when value is a URL, possibly relative to base.
+// FOR NOW: leave the url relative.
+// Caller must free the result (eventually).
+static Rune*
+aurlval(Token* tok, int attid, Rune* dflt, Rune* base)
+{
+	Rune*	ans;
+	Rune*	url;
+
+	USED(base);
+	ans = nil;
+	if(_tokaval(tok, attid, &url, 0) && url != nil)
+		ans = removeallwhite(url);
+	if(ans == nil)
+		ans = _Strdup(dflt);
+	return ans;
+}
+
+// Return copy of s but with all whitespace (even internal) removed.
+// This fixes some buggy URL specification strings.
+static Rune*
+removeallwhite(Rune* s)
+{
+	int	j;
+	int	n;
+	int	i;
+	int	c;
+	Rune*	ans;
+
+	j = 0;
+	n = _Strlen(s);
+	for(i = 0; i < n; i++) {
+		c = s[i];
+		if(c >= 256 || !isspace(c))
+			j++;
+	}
+	if(j < n) {
+		ans = _newstr(j);
+		j = 0;
+		for(i = 0; i < n; i++) {
+			c = s[i];
+			if(c >= 256 || !isspace(c))
+				ans[j++] = c;
+		}
+		ans[j] = 0;
+	}
+	else
+		ans = _Strdup(s);
+	return ans;
+}
+
+// Attribute value when mere presence of attr implies value of 1,
+// but if there is an integer there, return it as the value.
+static int
+aflagval(Token* tok, int attid)
+{
+	int	val;
+	Rune*	sval;
+
+	val = 0;
+	if(_tokaval(tok, attid, &sval, 0)) {
+		val = 1;
+		if(sval != nil)
+			val = toint(sval);
+	}
+	return val;
+}
+
+static Align
+makealign(int halign, int valign)
+{
+	Align	al;
+
+	al.halign = halign;
+	al.valign = valign;
+	return al;
+}
+
+// Make an Align (two alignments, horizontal and vertical)
+static Align
+aalign(Token* tok)
+{
+	return makealign(
+		atabval(tok, Aalign, align_tab, NALIGNTAB, ALnone),
+		atabval(tok, Avalign, align_tab, NALIGNTAB, ALnone));
+}
+
+// Make a Dimen, based on value of attid attr
+static Dimen
+adimen(Token* tok, int attid)
+{
+	Rune*	wd;
+
+	if(_tokaval(tok, attid, &wd, 0))
+		return parsedim(wd, _Strlen(wd));
+	else
+		return makedimen(Dnone, 0);
+}
+
+// Parse s[0:n] as num[.[num]][unit][%|*]
+static Dimen
+parsedim(Rune* s, int ns)
+{
+	int	kind;
+	int	spec;
+	Rune*	l;
+	int	nl;
+	Rune*	r;
+	int	nr;
+	int	mul;
+	int	i;
+	Rune*	f;
+	int	nf;
+	int	Tkdpi;
+	Rune*	units;
+
+	kind = Dnone;
+	spec = 0;
+	_splitl(s, ns, L(Lnot0to9), &l, &nl, &r, &nr);
+	if(nl != 0) {
+		spec = 1000*_Strtol(l, nil, 10);
+		if(nr > 0 && r[0] == '.') {
+			_splitl(r+1, nr-1, L(Lnot0to9), &f, &nf, &r, &nr);
+			if(nf != 0) {
+				mul = 100;
+				for(i = 0; i < nf; i++) {
+					spec = spec + mul*(f[i]-'0');
+					mul = mul/10;
+				}
+			}
+		}
+		kind = Dpixels;
+		if(nr != 0) {
+			if(nr >= 2) {
+				Tkdpi = 100;
+				units = r;
+				r = r+2;
+				nr -= 2;
+				if(!_Strncmpci(units, 2, L(Lpt)))
+					spec = (spec*Tkdpi)/72;
+				else if(!_Strncmpci(units, 2, L(Lpi)))
+					spec = (spec*12*Tkdpi)/72;
+				else if(!_Strncmpci(units, 2, L(Lin)))
+					spec = spec*Tkdpi;
+				else if(!_Strncmpci(units, 2, L(Lcm)))
+					spec = (spec*100*Tkdpi)/254;
+				else if(!_Strncmpci(units, 2, L(Lmm)))
+					spec = (spec*10*Tkdpi)/254;
+				else if(!_Strncmpci(units, 2, L(Lem)))
+					spec = spec*15;
+				else {
+					if(warn)
+						fprint(2, "warning: unknown units %C%Cs\n", units[0], units[1]);
+				}
+			}
+			if(nr >= 1) {
+				if(r[0] == '%')
+					kind = Dpercent;
+				else if(r[0] == '*')
+					kind = Drelative;
+			}
+		}
+		spec = spec/1000;
+	}
+	else if(nr == 1 && r[0] == '*') {
+		spec = 1;
+		kind = Drelative;
+	}
+	return makedimen(kind, spec);
+}
+
+static void
+setdimarray(Token* tok, int attid, Dimen** pans, int* panslen)
+{
+	Rune*	s;
+	Dimen*	d;
+	int	k;
+	int	nc;
+	Rune* a[SMALLBUFSIZE];
+	int	an[SMALLBUFSIZE];
+
+	if(_tokaval(tok, attid, &s, 0)) {
+		nc = _splitall(s, _Strlen(s), L(Lcommaspace), a, an, SMALLBUFSIZE);
+		if(nc > 0) {
+			d = (Dimen*)emalloc(nc * sizeof(Dimen));
+			for(k = 0; k < nc; k++) {
+				d[k] = parsedim(a[k], an[k]);
+			}
+			*pans = d;
+			*panslen = nc;
+			return;
+		}
+	}
+	*pans = nil;
+	*panslen = 0;
+}
+
+static Background
+makebackground(Rune* imageurl, int color)
+{
+	Background bg;
+
+	bg.image = imageurl;
+	bg.color = color;
+	return bg;
+}
+
+static Item*
+newitext(Rune* s, int fnt, int fg, int voff, int ul)
+{
+	Itext* t;
+
+	assert(s != nil);
+	t = (Itext*)emalloc(sizeof(Itext));
+	t->item.tag = Itexttag;
+	t->s = s;
+	t->fnt = fnt;
+	t->fg = fg;
+	t->voff = voff;
+	t->ul = ul;
+	return (Item*)t;
+}
+
+static Item*
+newirule(int align, int size, int noshade, Dimen wspec)
+{
+	Irule* r;
+
+	r = (Irule*)emalloc(sizeof(Irule));
+	r->item.tag = Iruletag;
+	r->align = align;
+	r->size = size;
+	r->noshade = noshade;
+	r->wspec = wspec;
+	return (Item*)r;
+}
+
+// Map is owned elsewhere.
+static Item*
+newiimage(Rune* src, Rune* altrep, int align, int width, int height,
+		int hspace, int vspace, int border, int ismap, Map* map)
+{
+	Iimage* i;
+	int	state;
+
+	state = 0;
+	if(ismap)
+		state = IFsmap;
+	i = (Iimage*)emalloc(sizeof(Iimage));
+	i->item.tag = Iimagetag;
+	i->item.state = state;
+	i->imsrc = src;
+	i->altrep = altrep;
+	i->align = align;
+	i->imwidth = width;
+	i->imheight = height;
+	i->hspace = hspace;
+	i->vspace = vspace;
+	i->border = border;
+	i->map = map;
+	i->ctlid = -1;
+	return (Item*)i;
+}
+
+static Item*
+newiformfield(Formfield* ff)
+{
+	Iformfield* f;
+
+	f = (Iformfield*)emalloc(sizeof(Iformfield));
+	f->item.tag = Iformfieldtag;
+	f->formfield = ff;
+	return (Item*)f;
+}
+
+static Item*
+newitable(Table* tab)
+{
+	Itable* t;
+
+	t = (Itable*)emalloc(sizeof(Itable));
+	t->item.tag = Itabletag;
+	t->table = tab;
+	return (Item*)t;
+}
+
+static Item*
+newifloat(Item* it, int side)
+{
+	Ifloat* f;
+
+	f = (Ifloat*)emalloc(sizeof(Ifloat));
+	f->_item.tag = Ifloattag;
+	f->_item.state = IFwrap;
+	f->item = it;
+	f->side = side;
+	return (Item*)f;
+}
+
+static Item*
+newispacer(int spkind)
+{
+	Ispacer* s;
+
+	s = (Ispacer*)emalloc(sizeof(Ispacer));
+	s->item.tag = Ispacertag;
+	s->spkind = spkind;
+	return (Item*)s;
+}
+
+// Free one item (caller must deal with next pointer)
+static void
+freeitem(Item* it)
+{
+	Iimage* ii;
+	Genattr* ga;
+
+	if(it == nil)
+		return;
+
+	switch(it->tag) {
+	case Itexttag:
+		free(((Itext*)it)->s);
+		break;
+	case Iimagetag:
+		ii = (Iimage*)it;
+		free(ii->imsrc);
+		free(ii->altrep);
+		break;
+	case Iformfieldtag:
+		freeformfield(((Iformfield*)it)->formfield);
+		break;
+	case Itabletag:
+		freetable(((Itable*)it)->table);
+		break;
+	case Ifloattag:
+		freeitem(((Ifloat*)it)->item);
+		break;
+	}
+	ga = it->genattr;
+	if(ga != nil) {
+		free(ga->id);
+		free(ga->class);
+		free(ga->style);
+		free(ga->title);
+		freescriptevents(ga->events);
+	}
+	free(it);
+}
+
+// Free list of items chained through next pointer
+void
+freeitems(Item* ithead)
+{
+	Item* it;
+	Item* itnext;
+
+	it = ithead;
+	while(it != nil) {
+		itnext = it->next;
+		freeitem(it);
+		it = itnext;
+	}
+}
+
+static void
+freeformfield(Formfield* ff)
+{
+	Option* o;
+	Option* onext;
+
+	if(ff == nil)
+		return;
+
+	free(ff->name);
+	free(ff->value);
+	for(o = ff->options; o != nil; o = onext) {
+		onext = o->next;
+		free(o->value);
+		free(o->display);
+	}
+	free(ff);
+}
+
+static void
+freetable(Table* t)
+{
+	int i;
+	Tablecell* c;
+	Tablecell* cnext;
+
+	if(t == nil)
+		return;
+
+	// We'll find all the unique cells via t->cells and next pointers.
+	// (Other pointers to cells in the table are duplicates of these)
+	for(c = t->cells; c != nil; c = cnext) {
+		cnext = c->next;
+		freeitems(c->content);
+	}
+	if(t->grid != nil) {
+		for(i = 0; i < t->nrow; i++)
+			free(t->grid[i]);
+		free(t->grid);
+	}
+	free(t->rows);
+	free(t->cols);
+	freeitems(t->caption);
+	free(t);
+}
+
+static void
+freeform(Form* f)
+{
+	if(f == nil)
+		return;
+
+	free(f->name);
+	free(f->action);
+	// Form doesn't own its fields (Iformfield items do)
+	free(f);
+}
+
+static void
+freeforms(Form* fhead)
+{
+	Form* f;
+	Form* fnext;
+
+	for(f = fhead; f != nil; f = fnext) {
+		fnext = f->next;
+		freeform(f);
+	}
+}
+
+static void
+freeanchor(Anchor* a)
+{
+	if(a == nil)
+		return;
+
+	free(a->name);
+	free(a->href);
+	free(a);
+}
+
+static void
+freeanchors(Anchor* ahead)
+{
+	Anchor* a;
+	Anchor* anext;
+
+	for(a = ahead; a != nil; a = anext) {
+		anext = a->next;
+		freeanchor(a);
+	}
+}
+
+static void
+freedestanchor(DestAnchor* da)
+{
+	if(da == nil)
+		return;
+
+	free(da->name);
+	free(da);
+}
+
+static void
+freedestanchors(DestAnchor* dahead)
+{
+	DestAnchor* da;
+	DestAnchor* danext;
+
+	for(da = dahead; da != nil; da = danext) {
+		danext = da->next;
+		freedestanchor(da);
+	}
+}
+
+static void
+freearea(Area* a)
+{
+	if(a == nil)
+		return;
+	free(a->href);
+	free(a->coords);
+}
+
+static void freekidinfos(Kidinfo* khead);
+
+static void
+freekidinfo(Kidinfo* k)
+{
+	if(k->isframeset) {
+		free(k->rows);
+		free(k->cols);
+		freekidinfos(k->kidinfos);
+	}
+	else {
+		free(k->src);
+		free(k->name);
+	}
+	free(k);
+}
+
+static void
+freekidinfos(Kidinfo* khead)
+{
+	Kidinfo* k;
+	Kidinfo* knext;
+
+	for(k = khead; k != nil; k = knext) {
+		knext = k->next;
+		freekidinfo(k);
+	}
+}
+
+static void
+freemap(Map* m)
+{
+	Area* a;
+	Area* anext;
+
+	if(m == nil)
+		return;
+
+	free(m->name);
+	for(a = m->areas; a != nil; a = anext) {
+		anext = a->next;
+		freearea(a);
+	}
+	free(m);
+}
+
+static void
+freemaps(Map* mhead)
+{
+	Map* m;
+	Map* mnext;
+
+	for(m = mhead; m != nil; m = mnext) {
+		mnext = m->next;
+		freemap(m);
+	}
+}
+
+void
+freedocinfo(Docinfo* d)
+{
+	if(d == nil)
+		return;
+	free(d->src);
+	free(d->base);
+	freeitem((Item*)d->backgrounditem);
+	free(d->refresh);
+	freekidinfos(d->kidinfo);
+	freeanchors(d->anchors);
+	freedestanchors(d->dests);
+	freeforms(d->forms);
+	freemaps(d->maps);
+	// tables, images, and formfields are freed when
+	// the items pointing at them are freed
+	free(d);
+}
+
+// Currently, someone else owns all the memory
+// pointed to by things in a Pstate.
+static void
+freepstate(Pstate* p)
+{
+	free(p);
+}
+
+static void
+freepstatestack(Pstate* pshead)
+{
+	Pstate* p;
+	Pstate* pnext;
+
+	for(p = pshead; p != nil; p = pnext) {
+		pnext = p->next;
+		free(p);
+	}
+}
+
+static int
+Iconv(Fmt *f)
+{
+	Item*	it;
+	Itext*	t;
+	Irule*	r;
+	Iimage*	i;
+	Ifloat*	fl;
+	int	state;
+	Formfield*	ff;
+	Rune*	ty;
+	Tablecell*	c;
+	Table*	tab;
+	char*	p;
+	int	cl;
+	int	hang;
+	int	indent;
+	int	bi;
+	int	nbuf;
+	char	buf[BIGBUFSIZE];
+
+	it = va_arg(f->args, Item*);
+	bi = 0;
+	nbuf = sizeof(buf);
+	state = it->state;
+	nbuf = nbuf-1;
+	if(state&IFbrk) {
+		cl = state&(IFcleft|IFcright);
+		p = "";
+		if(cl) {
+			if(cl == (IFcleft|IFcright))
+				p = " both";
+			else if(cl == IFcleft)
+				p = " left";
+			else
+				p = " right";
+		}
+		bi = snprint(buf, nbuf, "brk(%d%s)", (state&IFbrksp)? 1 : 0, p);
+	}
+	if(state&IFnobrk)
+		bi += snprint(buf+bi, nbuf-bi, " nobrk");
+	if(!(state&IFwrap))
+		bi += snprint(buf+bi, nbuf-bi, " nowrap");
+	if(state&IFrjust)
+		bi += snprint(buf+bi, nbuf-bi, " rjust");
+	if(state&IFcjust)
+		bi += snprint(buf+bi, nbuf-bi, " cjust");
+	if(state&IFsmap)
+		bi += snprint(buf+bi, nbuf-bi, " smap");
+	indent = (state&IFindentmask) >> IFindentshift;
+	if(indent > 0)
+		bi += snprint(buf+bi, nbuf-bi, " indent=%d", indent);
+	hang = state&IFhangmask;
+	if(hang > 0)
+		bi += snprint(buf+bi, nbuf-bi, " hang=%d", hang);
+
+	switch(it->tag) {
+	case Itexttag:
+		t = (Itext*)it;
+		bi += snprint(buf+bi, nbuf-bi, " Text '%S', fnt=%d, fg=%x", t->s, t->fnt, t->fg);
+		break;
+
+	case Iruletag:
+		r = (Irule*)it;
+		bi += snprint(buf+bi, nbuf-bi, "Rule size=%d, al=%S, wspec=", r->size, stringalign(r->align));
+		bi += dimprint(buf+bi, nbuf-bi, r->wspec);
+		break;
+
+	case Iimagetag:
+		i = (Iimage*)it;
+		bi += snprint(buf+bi, nbuf-bi,
+			"Image src=%S, alt=%S, al=%S, w=%d, h=%d hsp=%d, vsp=%d, bd=%d, map=%S",
+			i->imsrc, i->altrep? i->altrep : L(Lempty), stringalign(i->align), i->imwidth, i->imheight,
+			i->hspace, i->vspace, i->border, i->map?i->map->name : L(Lempty));
+		break;
+
+	case Iformfieldtag:
+		ff = ((Iformfield*)it)->formfield;
+		if(ff->ftype == Ftextarea)
+			ty = L(Ltextarea);
+		else if(ff->ftype == Fselect)
+			ty = L(Lselect);
+		else {
+			ty = _revlookup(input_tab, NINPUTTAB, ff->ftype);
+			if(ty == nil)
+				ty = L(Lnone);
+		}
+		bi += snprint(buf+bi, nbuf-bi, "Formfield %S, fieldid=%d, formid=%d, name=%S, value=%S",
+			ty, ff->fieldid, ff->form->formid, ff->name?  ff->name : L(Lempty),
+			ff->value? ff->value : L(Lempty));
+		break;
+
+	case Itabletag:
+		tab = ((Itable*)it)->table;
+		bi += snprint(buf+bi, nbuf-bi, "Table tableid=%d, width=", tab->tableid);
+		bi += dimprint(buf+bi, nbuf-bi, tab->width);
+		bi += snprint(buf+bi, nbuf-bi, ", nrow=%d, ncol=%d, ncell=%d, totw=%d, toth=%d\n",
+			tab->nrow, tab->ncol, tab->ncell, tab->totw, tab->toth);
+		for(c = tab->cells; c != nil; c = c->next)
+			bi += snprint(buf+bi, nbuf-bi, "Cell %d.%d, at (%d,%d) ",
+					tab->tableid, c->cellid, c->row, c->col);
+		bi += snprint(buf+bi, nbuf-bi, "End of Table %d", tab->tableid);
+		break;
+
+	case Ifloattag:
+		fl = (Ifloat*)it;
+		bi += snprint(buf+bi, nbuf-bi, "Float, x=%d y=%d, side=%S, it=%I",
+			fl->x, fl->y, stringalign(fl->side), fl->item);
+		bi += snprint(buf+bi, nbuf-bi, "\n\t");
+		break;
+
+	case Ispacertag:
+		p = "";
+		switch(((Ispacer*)it)->spkind) {
+		case ISPnull:
+			p = "null";
+			break;
+		case ISPvline:
+			p = "vline";
+			break;
+		case ISPhspace:
+			p = "hspace";
+			break;
+		}
+		bi += snprint(buf+bi, nbuf-bi, "Spacer %s ", p);
+		break;
+	}
+	bi += snprint(buf+bi, nbuf-bi, " w=%d, h=%d, a=%d, anchor=%d\n",
+			it->width, it->height, it->ascent, it->anchorid);
+	buf[bi] = 0;
+	return fmtstrcpy(f, buf);
+}
+
+// String version of alignment 'a'
+static Rune*
+stringalign(int a)
+{
+	Rune*	s;
+
+	s = _revlookup(align_tab, NALIGNTAB, a);
+	if(s == nil)
+		s = L(Lnone);
+	return s;
+}
+
+// Put at most nbuf chars of representation of d into buf,
+// and return number of characters put
+static int
+dimprint(char* buf, int nbuf, Dimen d)
+{
+	int	n;
+	int	k;
+
+	n = 0;
+	n += snprint(buf, nbuf, "%d", dimenspec(d));
+	k = dimenkind(d);
+	if(k == Dpercent)
+		buf[n++] = '%';
+	if(k == Drelative)
+		buf[n++] = '*';
+	return n;
+}
+
+void
+printitems(Item* items, char* msg)
+{
+	Item*	il;
+
+	fprint(2, "%s\n", msg);
+	il = items;
+	while(il != nil) {
+		fprint(2, "%I", il);
+		il = il->next;
+	}
+}
+
+static Genattr*
+newgenattr(Rune* id, Rune* class, Rune* style, Rune* title, SEvent* events)
+{
+	Genattr* g;
+
+	g = (Genattr*)emalloc(sizeof(Genattr));
+	g->id = id;
+	g->class = class;
+	g->style = style;
+	g->title = title;
+	g->events = events;
+	return g;
+}
+
+static Formfield*
+newformfield(int ftype, int fieldid, Form* form, Rune* name,
+		Rune* value, int size, int maxlength, Formfield* link)
+{
+	Formfield* ff;
+
+	ff = (Formfield*)emalloc(sizeof(Formfield));
+	ff->ftype = ftype;
+	ff->fieldid = fieldid;
+	ff->form = form;
+	ff->name = name;
+	ff->value = value;
+	ff->size = size;
+	ff->maxlength = maxlength;
+	ff->ctlid = -1;
+	ff->next = link;
+	return ff;
+}
+
+// Transfers ownership of value and display to Option.
+static Option*
+newoption(int selected, Rune* value, Rune* display, Option* link)
+{
+	Option *o;
+
+	o = (Option*)emalloc(sizeof(Option));
+	o->selected = selected;
+	o->value = value;
+	o->display = display;
+	o->next = link;
+	return o;
+}
+
+static Form*
+newform(int formid, Rune* name, Rune* action, int target, int method, Form* link)
+{
+	Form* f;
+
+	f = (Form*)emalloc(sizeof(Form));
+	f->formid = formid;
+	f->name = name;
+	f->action = action;
+	f->target = target;
+	f->method = method;
+	f->nfields = 0;
+	f->fields = nil;
+	f->next = link;
+	return f;
+}
+
+static Table*
+newtable(int tableid, Align align, Dimen width, int border,
+	int cellspacing, int cellpadding, Background bg, Token* tok, Table* link)
+{
+	Table* t;
+
+	t = (Table*)emalloc(sizeof(Table));
+	t->tableid = tableid;
+	t->align = align;
+	t->width = width;
+	t->border = border;
+	t->cellspacing = cellspacing;
+	t->cellpadding = cellpadding;
+	t->background = bg;
+	t->caption_place = ALbottom;
+	t->caption_lay = nil;
+	t->tabletok = tok;
+	t->tabletok = nil;
+	t->next = link;
+	return t;
+}
+
+static Tablerow*
+newtablerow(Align align, Background bg, int flags, Tablerow* link)
+{
+	Tablerow* tr;
+
+	tr = (Tablerow*)emalloc(sizeof(Tablerow));
+	tr->align = align;
+	tr->background = bg;
+	tr->flags = flags;
+	tr->next = link;
+	return tr;
+}
+
+static Tablecell*
+newtablecell(int cellid, int rowspan, int colspan, Align align, Dimen wspec, int hspec,
+		Background bg, int flags, Tablecell* link)
+{
+	Tablecell* c;
+
+	c = (Tablecell*)emalloc(sizeof(Tablecell));
+	c->cellid = cellid;
+	c->lay = nil;
+	c->rowspan = rowspan;
+	c->colspan = colspan;
+	c->align = align;
+	c->flags = flags;
+	c->wspec = wspec;
+	c->hspec = hspec;
+	c->background = bg;
+	c->next = link;
+	return c;
+}
+
+static Anchor*
+newanchor(int index, Rune* name, Rune* href, int target, Anchor* link)
+{
+	Anchor* a;
+
+	a = (Anchor*)emalloc(sizeof(Anchor));
+	a->index = index;
+	a->name = name;
+	a->href = href;
+	a->target = target;
+	a->next = link;
+	return a;
+}
+
+static DestAnchor*
+newdestanchor(int index, Rune* name, Item* item, DestAnchor* link)
+{
+	DestAnchor* d;
+
+	d = (DestAnchor*)emalloc(sizeof(DestAnchor));
+	d->index = index;
+	d->name = name;
+	d->item = item;
+	d->next = link;
+	return d;
+}
+
+static SEvent*
+newscriptevent(int type, Rune* script, SEvent* link)
+{
+	SEvent* ans;
+
+	ans = (SEvent*)emalloc(sizeof(SEvent));
+	ans->type = type;
+	ans->script = script;
+	ans->next = link;
+	return ans;
+}
+
+static void
+freescriptevents(SEvent* ehead)
+{
+	SEvent* e;
+	SEvent* nexte;
+
+	e = ehead;
+	while(e != nil) {
+		nexte = e->next;
+		free(e->script);
+		free(e);
+		e = nexte;
+	}
+}
+
+static Dimen
+makedimen(int kind, int spec)
+{
+	Dimen d;
+
+	if(spec&Dkindmask) {
+		if(warn)
+			fprint(2, "warning: dimension spec too big: %d\n", spec);
+		spec = 0;
+	}
+	d.kindspec = kind|spec;
+	return d;
+}
+
+int
+dimenkind(Dimen d)
+{
+	return (d.kindspec&Dkindmask);
+}
+
+int
+dimenspec(Dimen d)
+{
+	return (d.kindspec&Dspecmask);
+}
+
+static Kidinfo*
+newkidinfo(int isframeset, Kidinfo* link)
+{
+	Kidinfo*	ki;
+
+	ki = (Kidinfo*)emalloc(sizeof(Kidinfo));
+	ki->isframeset = isframeset;
+	if(!isframeset) {
+		ki->flags = FRhscrollauto|FRvscrollauto;
+		ki->marginw = FRKIDMARGIN;
+		ki->marginh = FRKIDMARGIN;
+		ki->framebd = 1;
+	}
+	ki->next = link;
+	return ki;
+}
+
+static Docinfo*
+newdocinfo(void)
+{
+	Docinfo*	d;
+
+	d = (Docinfo*)emalloc(sizeof(Docinfo));
+	resetdocinfo(d);
+	return d;
+}
+
+static void
+resetdocinfo(Docinfo* d)
+{
+	memset(d, 0, sizeof(Docinfo));
+	d->background = makebackground(nil, White);
+	d->text = Black;
+	d->link = Blue;
+	d->vlink = Blue;
+	d->alink = Blue;
+	d->target = FTself;
+	d->chset = ISO_8859_1;
+	d->scripttype = TextJavascript;
+	d->frameid = -1;
+}
+
+// Use targetmap array to keep track of name <-> targetid mapping.
+// Use real malloc(), and never free
+static void
+targetmapinit(void)
+{
+	targetmapsize = 10;
+	targetmap = (StringInt*)emalloc(targetmapsize*sizeof(StringInt));
+	memset(targetmap, 0, targetmapsize*sizeof(StringInt));
+	targetmap[0].key = _Strdup(L(L_top));
+	targetmap[0].val = FTtop;
+	targetmap[1].key = _Strdup(L(L_self));
+	targetmap[1].val = FTself;
+	targetmap[2].key = _Strdup(L(L_parent));
+	targetmap[2].val = FTparent;
+	targetmap[3].key = _Strdup(L(L_blank));
+	targetmap[3].val = FTblank;
+	ntargets = 4;
+}
+
+int
+targetid(Rune* s)
+{
+	int i;
+	int n;
+
+	n = _Strlen(s);
+	if(n == 0)
+		return FTself;
+	for(i = 0; i < ntargets; i++)
+		if(_Strcmp(s, targetmap[i].key) == 0)
+			return targetmap[i].val;
+	if(i >= targetmapsize) {
+		targetmapsize += 10;
+		targetmap = (StringInt*)erealloc(targetmap, targetmapsize*sizeof(StringInt));
+	}
+	targetmap[i].key = (Rune*)emalloc((n+1)*sizeof(Rune));
+	memmove(targetmap[i].key, s, (n+1)*sizeof(Rune));
+	targetmap[i].val = i;
+	ntargets++;
+	return i;
+}
+
+Rune*
+targetname(int targid)
+{
+	int i;
+
+	for(i = 0; i < ntargets; i++)
+		if(targetmap[i].val == targid)
+			return targetmap[i].key;
+	return L(Lquestion);
+}
+
+// Convert HTML color spec to RGB value, returning dflt if can't.
+// Argument is supposed to be a valid HTML color, or "".
+// Return the RGB value of the color, using dflt if s
+// is nil or an invalid color.
+static int
+color(Rune* s, int dflt)
+{
+	int v;
+	Rune* rest;
+
+	if(s == nil)
+		return dflt;
+	if(_lookup(color_tab, NCOLORS, s, _Strlen(s), &v))
+		return v;
+	if(s[0] == '#')
+		s++;
+	v = _Strtol(s, &rest, 16);
+	if(*rest == 0)
+		return v;
+	return dflt;
+}
+
+// Debugging
+
+#define HUGEPIX 10000
+
+// A "shallow" validitem, that doesn't follow next links
+// or descend into tables.
+static int
+validitem(Item* i)
+{
+	int ok;
+	Itext* ti;
+	Irule* ri;
+	Iimage* ii;
+	Ifloat* fi;
+	int a;
+
+	ok = (i->tag >= Itexttag && i->tag <= Ispacertag) &&
+		(i->next == nil || validptr(i->next)) &&
+		(i->width >= 0 && i->width < HUGEPIX) &&
+		(i->height >= 0 && i->height < HUGEPIX) &&
+		(i->ascent > -HUGEPIX && i->ascent < HUGEPIX) &&
+		(i->anchorid >= 0) &&
+		(i->genattr == nil || validptr(i->genattr));
+	// also, could check state for ridiculous combinations
+	// also, could check anchorid for within-doc-range
+	if(ok)
+		switch(i->tag) {
+		case Itexttag:
+			ti = (Itext*)i;
+			ok = validStr(ti->s) &&
+				(ti->fnt >= 0 && ti->fnt < NumStyle*NumSize) &&
+				(ti->ul == ULnone || ti->ul == ULunder || ti->ul == ULmid);
+			break;
+		case Iruletag:
+			ri = (Irule*)i;
+			ok = (validvalign(ri->align) || validhalign(ri->align)) &&
+				(ri->size >=0 && ri->size < HUGEPIX);
+			break;
+		case Iimagetag:
+			ii = (Iimage*)i;
+			ok = (ii->imsrc == nil || validptr(ii->imsrc)) &&
+				(ii->item.width >= 0 && ii->item.width < HUGEPIX) &&
+				(ii->item.height >= 0 && ii->item.height < HUGEPIX) &&
+				(ii->imwidth >= 0 && ii->imwidth < HUGEPIX) &&
+				(ii->imheight >= 0 && ii->imheight < HUGEPIX) &&
+				(ii->altrep == nil || validStr(ii->altrep)) &&
+				(ii->map == nil || validptr(ii->map)) &&
+				(validvalign(ii->align) || validhalign(ii->align)) &&
+				(ii->nextimage == nil || validptr(ii->nextimage));
+			break;
+		case Iformfieldtag:
+			ok = validformfield(((Iformfield*)i)->formfield);
+			break;
+		case Itabletag:
+			ok = validptr((Itable*)i);
+			break;
+		case Ifloattag:
+			fi = (Ifloat*)i;
+			ok = (fi->side == ALleft || fi->side == ALright) &&
+				validitem(fi->item) &&
+				(fi->item->tag == Iimagetag || fi->item->tag == Itabletag);
+			break;
+		case Ispacertag:
+			a = ((Ispacer*)i)->spkind;
+			ok = a==ISPnull || a==ISPvline || a==ISPhspace || a==ISPgeneral;
+			break;
+		default:
+			ok = 0;
+		}
+	return ok;
+}
+
+// "deep" validation, that checks whole list of items,
+// and descends into tables and floated tables.
+// nil is ok for argument.
+int
+validitems(Item* i)
+{
+	int ok;
+	Item* ii;
+
+	ok = 1;
+	while(i != nil && ok) {
+		ok = validitem(i);
+		if(ok) {
+			if(i->tag == Itabletag) {
+				ok = validtable(((Itable*)i)->table);
+			}
+			else if(i->tag == Ifloattag) {
+				ii = ((Ifloat*)i)->item;
+				if(ii->tag == Itabletag)
+					ok = validtable(((Itable*)ii)->table);
+			}
+		}
+		if(!ok) {
+			fprint(2, "invalid item: %I\n", i);
+		}
+		i = i->next;
+	}
+	return ok;
+}
+
+static int
+validformfield(Formfield* f)
+{
+	int ok;
+
+	ok = (f->next == nil || validptr(f->next)) &&
+		(f->ftype >= 0 && f->ftype <= Ftextarea) &&
+		f->fieldid >= 0 &&
+		(f->form == nil || validptr(f->form)) &&
+		(f->name == nil || validStr(f->name)) &&
+		(f->value == nil || validStr(f->value)) &&
+		(f->options == nil || validptr(f->options)) &&
+		(f->image == nil || validitem(f->image)) &&
+		(f->events == nil || validptr(f->events));
+	// when all built, should have f->fieldid < f->form->nfields,
+	// but this may be called during build...
+	return ok;
+}
+
+// "deep" validation -- checks cell contents too
+static int
+validtable(Table* t)
+{
+	int ok;
+	int i, j;
+	Tablecell* c;
+
+	ok = (t->next == nil || validptr(t->next)) &&
+		t->nrow >= 0 &&
+		t->ncol >= 0 &&
+		t->ncell >= 0 &&
+		validalign(t->align) &&
+		validdimen(t->width) &&
+		(t->border >= 0 && t->border < HUGEPIX) &&
+		(t->cellspacing >= 0 && t->cellspacing < HUGEPIX) &&
+		(t->cellpadding >= 0 && t->cellpadding < HUGEPIX) &&
+		validitems(t->caption) &&
+		(t->caption_place == ALtop || t->caption_place == ALbottom) &&
+		(t->totw >= 0 && t->totw < HUGEPIX) &&
+		(t->toth >= 0 && t->toth < HUGEPIX) &&
+		(t->tabletok == nil || validptr(t->tabletok));
+	// during parsing, t->rows has list;
+	// only when parsing is done is t->nrow set > 0
+	if(ok && t->nrow > 0 && t->ncol > 0) {
+		// table is "finished"
+		for(i = 0; i < t->nrow && ok; i++) 
+			ok = validtablerow(t->rows+i);
+		for(j = 0; j < t->ncol && ok; j++)
+			ok = validtablecol(t->cols+j);
+		for(c = t->cells; c != nil && ok; c = c->next)
+			ok = validtablecell(c);
+		for(i = 0; i < t->nrow && ok; i++)
+			for(j = 0; j < t->ncol && ok; j++)
+				ok = validptr(t->grid[i][j]);
+	}
+	return ok;
+}
+
+static int
+validvalign(int a)
+{
+	return a == ALnone || a == ALmiddle || a == ALbottom || a == ALtop || a == ALbaseline;
+}
+
+static int
+validhalign(int a)
+{
+	return a == ALnone || a == ALleft || a == ALcenter || a == ALright ||
+			a == ALjustify || a == ALchar;
+}
+
+static int
+validalign(Align a)
+{
+	return validhalign(a.halign) && validvalign(a.valign);
+}
+
+static int
+validdimen(Dimen d)
+{
+	int ok;
+	int s;
+
+	ok = 0;
+	s = d.kindspec&Dspecmask;
+	switch(d.kindspec&Dkindmask) {
+	case Dnone:
+		ok = s==0;
+		break;
+	case Dpixels:
+		ok = s < HUGEPIX;
+		break;
+	case Dpercent:
+	case Drelative:
+		ok = 1;
+		break;
+	}
+	return ok;
+}
+
+static int
+validtablerow(Tablerow* r)
+{
+	return (r->cells == nil || validptr(r->cells)) &&
+		(r->height >= 0 && r->height < HUGEPIX) &&
+		(r->ascent > -HUGEPIX && r->ascent < HUGEPIX) &&
+		validalign(r->align);
+}
+
+static int
+validtablecol(Tablecol* c)
+{
+	return c->width >= 0 && c->width < HUGEPIX
+		&& validalign(c->align);
+}
+
+static int
+validtablecell(Tablecell* c)
+{
+	int ok;
+
+	ok = (c->next == nil || validptr(c->next)) &&
+		(c->nextinrow == nil || validptr(c->nextinrow)) &&
+		(c->content == nil || validptr(c->content)) &&
+		(c->lay == nil || validptr(c->lay)) &&
+		c->rowspan >= 0 &&
+		c->colspan >= 0 &&
+		validalign(c->align) &&
+		validdimen(c->wspec) &&
+		c->row >= 0 &&
+		c->col >= 0;
+	if(ok) {
+		if(c->content != nil)
+			ok = validitems(c->content);
+	}
+	return ok;
+}
+
+static int
+validptr(void* p)
+{
+	// TODO: a better job of this.
+	// For now, just dereference, which cause a bomb
+	// if not valid
+	static char c;
+
+	c = *((char*)p);
+	return 1;
+}
+
+static int
+validStr(Rune* s)
+{
+	return s != nil && validptr(s);
+}

diff --git a/src/libhtml/impl.h b/src/libhtml/impl.h
new file mode 100644
index 0000000..f8c79ea
--- /dev/null
+++ b/src/libhtml/impl.h

@@ -0,0 +1,163 @@
+
+// UTILS
+typedef struct List List;
+typedef struct Strlist Strlist;
+
+// List of integers (and also generic list with next pointer at beginning)
+struct List
+{
+	List*	next;
+	int	val;
+};
+
+struct Strlist
+{
+	Strlist*	next;
+	Rune*	val;
+};
+
+extern int		_inclass(Rune c, Rune* cl);
+extern int		_listlen(List* l);
+extern Rune*	_ltoStr(int n);
+extern List*	_newlist(int val, List* rest);
+extern Rune*	_newstr(int n);
+extern int		_prefix(Rune* pre, Rune* s);
+extern List*	_revlist(List* l);
+extern void	_splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2);
+extern void	_splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2);
+extern int		_splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen);
+extern Rune*	_Stradd(Rune*s1, Rune* s2, int n);
+extern Rune*	_Strclass(Rune* s, Rune* cl);
+extern int		_Strcmp(Rune* s1, Rune* s2);
+extern Rune*	_Strdup(Rune* s);
+extern Rune*	_Strdup2(Rune* s, Rune* t);
+extern int		_Streqn(Rune* s1, int n1, Rune* s2);
+extern int		_Strlen(Rune* s);
+extern Rune*	_Strnclass(Rune* s, Rune* cl, int n);
+extern int		_Strncmpci(Rune* s1, int n1, Rune* s2);
+extern Rune*	_Strndup(Rune* s, int n);
+extern Rune*	_Strnrclass(Rune* s, Rune* cl, int n);
+extern Rune*	_Strrclass(Rune* s, Rune* cl);
+extern Rune*	_Strsubstr(Rune* s, int start, int stop);
+extern long	_Strtol(Rune* s, Rune** eptr, int base);
+extern void	_trimwhite(Rune* s, int n, Rune** pans, int* panslen);
+
+extern Rune	notwhitespace[];
+extern Rune	whitespace[];
+
+// STRINTTAB
+typedef struct StringInt StringInt;
+
+// Element of String-Int table (used for keyword lookup)
+struct StringInt
+{
+	Rune*	key;
+	int	val;
+};
+
+extern int			_lookup(StringInt* t, int n, Rune* key, int keylen, int* pans);
+extern StringInt*	_makestrinttab(Rune** a, int n);
+extern Rune*		_revlookup(StringInt* t, int n, int val);
+
+// Colors, in html format, not Plan 9 format.  (RGB values in bottom 3 bytes)
+enum {
+	White = 0xFFFFFF,
+	Black = 0x000000,
+	Blue = 0x0000CC,
+};
+
+// LEX
+
+// HTML 4.0 tags (plus blink, nobr)
+// sorted in lexical order; used as array indices
+enum {
+	Notfound,
+	Comment,
+	Ta, Tabbr, Tacronym, Taddress, Tapplet, Tarea,
+	Tb, Tbase, Tbasefont, Tbdo, Tbig, Tblink,
+	Tblockquote, Tbody, Tbq, Tbr, Tbutton,
+	Tcaption, Tcenter, Tcite, Tcode, Tcol, Tcolgroup,
+	Tdd, Tdel, Tdfn, Tdir, Tdiv, Tdl, Tdt,
+	Tem,
+	Tfieldset, Tfont, Tform, Tframe, Tframeset,
+	Th1, Th2, Th3, Th4, Th5, Th6,
+	Thead, Thr, Thtml,
+	Ti, Tiframe, Timg, Tinput, Tins, Tisindex,
+	Tkbd,
+	Tlabel, Tlegend, Tli, Tlink,
+	Tmap, Tmenu, Tmeta,
+	Tnobr, Tnoframes, Tnoscript,
+	Tobject, Tol, Toptgroup, Toption,
+	Tp, Tparam, Tpre,
+	Tq,
+	Ts, Tsamp, Tscript, Tselect, Tsmall,
+	Tspan, Tstrike, Tstrong, Tstyle, Tsub, Tsup,
+	Ttable, Ttbody, Ttd, Ttextarea, Ttfoot,
+	Tth, Tthead, Ttitle, Ttr, Ttt,
+	Tu, Tul,
+	Tvar,
+	Numtags,
+	RBRA = Numtags,
+	Data = Numtags+RBRA
+};
+
+// HTML 4.0 tag attributes
+// Keep sorted in lexical order
+enum {
+	Aabbr, Aaccept_charset, Aaccess_key, Aaction,
+	Aalign, Aalink, Aalt, Aarchive, Aaxis,
+	Abackground, Abgcolor, Aborder,
+	Acellpadding, Acellspacing, Achar, Acharoff,
+	Acharset, Achecked, Acite, Aclass, Aclassid,
+	Aclear, Acode, Acodebase, Acodetype, Acolor,
+	Acols, Acolspan, Acompact, Acontent, Acoords,
+	Adata, Adatetime, Adeclare, Adefer, Adir, Adisabled,
+	Aenctype,
+	Aface, Afor, Aframe, Aframeborder,
+	Aheaders, Aheight, Ahref, Ahreflang, Ahspace, Ahttp_equiv,
+	Aid, Aismap,
+	Alabel, Alang, Alink, Alongdesc,
+	Amarginheight, Amarginwidth, Amaxlength,
+	Amedia, Amethod, Amultiple,
+	Aname, Anohref, Anoresize, Anoshade, Anowrap,
+	Aobject, Aonblur, Aonchange, Aonclick, Aondblclick,
+	Aonfocus, Aonkeypress, Aonkeyup, Aonload,
+	Aonmousedown, Aonmousemove, Aonmouseout,
+	Aonmouseover, Aonmouseup, Aonreset, Aonselect,
+	Aonsubmit, Aonunload,
+	Aprofile, Aprompt,
+	Areadonly, Arel, Arev, Arows, Arowspan, Arules,
+	Ascheme, Ascope, Ascrolling, Aselected, Ashape,
+	Asize, Aspan, Asrc, Astandby, Astart, Astyle, Asummary,
+	Atabindex, Atarget, Atext, Atitle, Atype,
+	Ausemap,
+	Avalign, Avalue, Avaluetype, Aversion, Avlink, Avspace,
+	Awidth,
+	Numattrs
+};
+
+struct Attr
+{
+	Attr*		next;		// in list of attrs for a token
+	int		attid;		// Aabbr, etc.
+	Rune*	value;
+};
+
+struct Token
+{
+	int		tag;		// Ta, etc
+	Rune*	text;		// text in Data, attribute text in tag
+	Attr*		attr;		// list of Attrs
+	int		starti;	// index into source buffer of token start
+};
+
+extern Rune**	tagnames;
+extern Rune**	attrnames;
+
+extern void	_freetokens(Token* tarray, int n);
+extern Token*	_gettoks(uchar* data, int datalen, int chset, int mtype, int* plen);
+extern int		_tokaval(Token* t, int attid, Rune** pans, int xfer);
+
+#pragma varargck	type "T"	Token*
+
+#include "runetab.h"

diff --git a/src/libhtml/lex.c b/src/libhtml/lex.c
new file mode 100644
index 0000000..99c5fc1
--- /dev/null
+++ b/src/libhtml/lex.c

@@ -0,0 +1,1384 @@
+#include <u.h>
+#include <libc.h>
+#include <draw.h>
+#include <ctype.h>
+#include <html.h>
+#include "impl.h"
+
+typedef struct TokenSource TokenSource;
+struct TokenSource
+{
+	int			i;		// index of next byte to use
+	uchar*		data;		// all the data
+	int			edata;	// data[0:edata] is valid
+	int			chset;	// one of US_Ascii, etc.
+	int			mtype;	// TextHtml or TextPlain
+};
+
+enum {
+	EOF = -2,
+	EOB = -1
+};
+
+#define ISNAMCHAR(c)	((c)<256 && (isalpha(c) || isdigit(c) || (c) == '-' || (c) == '.'))
+
+#define SMALLBUFSIZE 240
+#define BIGBUFSIZE 2000
+
+// HTML 4.0 tag names.
+// Keep sorted, and in correspondence with enum in iparse.h.
+Rune **tagnames;
+char *_tagnames[] = {
+	" ",
+	"!",
+	"a", 
+	"abbr",
+	"acronym",
+	"address",
+	"applet", 
+	"area",
+	"b",
+	"base",
+	"basefont",
+	"bdo",
+	"big",
+	"blink",
+	"blockquote",
+	"body",
+	"bq",
+	"br",
+	"button",
+	"caption",
+	"center",
+	"cite",
+	"code",
+	"col",
+	"colgroup",
+	"dd",
+	"del",
+	"dfn",
+	"dir",
+	"div",
+	"dl",
+	"dt",
+	"em",
+	"fieldset",
+	"font",
+	"form",
+	"frame",
+	"frameset",
+	"h1",
+	"h2",
+	"h3",
+	"h4",
+	"h5",
+	"h6",
+	"head",
+	"hr",
+	"html",
+	"i",
+	"iframe",
+	"img",
+	"input",
+	"ins",
+	"isindex",
+	"kbd",
+	"label",
+	"legend",
+	"li",
+	"link",
+	"map",
+	"menu",
+	"meta",
+	"nobr",
+	"noframes",
+	"noscript",
+	"object",
+	"ol",
+	"optgroup",
+	"option",
+	"p",
+	"param",
+	"pre",
+	"q",
+	"s",
+	"samp",
+	"script",
+	"select",
+	"small",
+	"span",
+	"strike",
+	"strong",
+	"style",
+	"sub",
+	"sup",
+	"table",
+	"tbody",
+	"td",
+	"textarea",
+	"tfoot",
+	"th",
+	"thead",
+	"title",
+	"tr",
+	"tt",
+	"u",
+	"ul",
+	"var"
+};
+
+// HTML 4.0 attribute names.
+// Keep sorted, and in correspondence with enum in i.h.
+Rune **attrnames;
+char* _attrnames[] = {
+	"abbr",
+	"accept-charset",
+	"access-key",
+	"action",
+	"align",
+	"alink",
+	"alt",
+	"archive",
+	"axis",
+	"background",
+	"bgcolor",
+	"border",
+	"cellpadding",
+	"cellspacing",
+	"char",
+	"charoff",
+	"charset",
+	"checked",
+	"cite",
+	"class",
+	"classid",
+	"clear",
+	"code",
+	"codebase",
+	"codetype",
+	"color",
+	"cols",
+	"colspan",
+	"compact",
+	"content",
+	"coords",
+	"data",
+	"datetime",
+	"declare",
+	"defer",
+	"dir",
+	"disabled",
+	"enctype",
+	"face",
+	"for",
+	"frame",
+	"frameborder",
+	"headers",
+	"height",
+	"href",
+	"hreflang",
+	"hspace",
+	"http-equiv",
+	"id",
+	"ismap",
+	"label",
+	"lang",
+	"link",
+	"longdesc",
+	"marginheight",
+	"marginwidth",
+	"maxlength",
+	"media",
+	"method",
+	"multiple",
+	"name",
+	"nohref",
+	"noresize",
+	"noshade",
+	"nowrap",
+	"object",
+	"onblur",
+	"onchange",
+	"onclick",
+	"ondblclick",
+	"onfocus",
+	"onkeypress",
+	"onkeyup",
+	"onload",
+	"onmousedown",
+	"onmousemove",
+	"onmouseout",
+	"onmouseover",
+	"onmouseup",
+	"onreset",
+	"onselect",
+	"onsubmit",
+	"onunload",
+	"profile",
+	"prompt",
+	"readonly",
+	"rel",
+	"rev",
+	"rows",
+	"rowspan",
+	"rules",
+	"scheme",
+	"scope",
+	"scrolling",
+	"selected",
+	"shape",
+	"size",
+	"span",
+	"src",
+	"standby",
+	"start",
+	"style",
+	"summary",
+	"tabindex",
+	"target",
+	"text",
+	"title",
+	"type",
+	"usemap",
+	"valign",
+	"value",
+	"valuetype",
+	"version",
+	"vlink",
+	"vspace",
+	"width"
+};
+
+
+// Character entity to unicode character number map.
+// Keep sorted by name.
+StringInt *chartab;
+AsciiInt _chartab[142] = {
+	{"AElig", 198},
+	{"Aacute", 193},
+	{"Acirc", 194},
+	{"Agrave", 192},
+	{"Aring", 197},
+	{"Atilde", 195},
+	{"Auml", 196},
+	{"Ccedil", 199},
+	{"ETH", 208},
+	{"Eacute", 201},
+	{"Ecirc", 202},
+	{"Egrave", 200},
+	{"Euml", 203},
+	{"Iacute", 205},
+	{"Icirc", 206},
+	{"Igrave", 204},
+	{"Iuml", 207},
+	{"Ntilde", 209},
+	{"Oacute", 211},
+	{"Ocirc", 212},
+	{"Ograve", 210},
+	{"Oslash", 216},
+	{"Otilde", 213},
+	{"Ouml", 214},
+	{"THORN", 222},
+	{"Uacute", 218},
+	{"Ucirc", 219},
+	{"Ugrave", 217},
+	{"Uuml", 220},
+	{"Yacute", 221},
+	{"aacute", 225},
+	{"acirc", 226},
+	{"acute", 180},
+	{"aelig", 230},
+	{"agrave", 224},
+	{"alpha", 945},
+	{"amp", 38},
+	{"aring", 229},
+	{"atilde", 227},
+	{"auml", 228},
+	{"beta", 946},
+	{"brvbar", 166},
+	{"ccedil", 231},
+	{"cdots", 8943},
+	{"cedil", 184},
+	{"cent", 162},
+	{"chi", 967},
+	{"copy", 169},
+	{"curren", 164},
+	{"ddots", 8945},
+	{"deg", 176},
+	{"delta", 948},
+	{"divide", 247},
+	{"eacute", 233},
+	{"ecirc", 234},
+	{"egrave", 232},
+	{"emdash", 8212},
+	{"emsp", 8195},
+	{"endash", 8211},
+	{"ensp", 8194},
+	{"epsilon", 949},
+	{"eta", 951},
+	{"eth", 240},
+	{"euml", 235},
+	{"frac12", 189},
+	{"frac14", 188},
+	{"frac34", 190},
+	{"gamma", 947},
+	{"gt", 62},
+	{"iacute", 237},
+	{"icirc", 238},
+	{"iexcl", 161},
+	{"igrave", 236},
+	{"iota", 953},
+	{"iquest", 191},
+	{"iuml", 239},
+	{"kappa", 954},
+	{"lambda", 955},
+	{"laquo", 171},
+	{"ldots", 8230},
+	{"lt", 60},
+	{"macr", 175},
+	{"micro", 181},
+	{"middot", 183},
+	{"mu", 956},
+	{"nbsp", 160},
+	{"not", 172},
+	{"ntilde", 241},
+	{"nu", 957},
+	{"oacute", 243},
+	{"ocirc", 244},
+	{"ograve", 242},
+	{"omega", 969},
+	{"omicron", 959},
+	{"ordf", 170},
+	{"ordm", 186},
+	{"oslash", 248},
+	{"otilde", 245},
+	{"ouml", 246},
+	{"para", 182},
+	{"phi", 966},
+	{"pi", 960},
+	{"plusmn", 177},
+	{"pound", 163},
+	{"psi", 968},
+	{"quad", 8193},
+	{"quot", 34},
+	{"raquo", 187},
+	{"reg", 174},
+	{"rho", 961},
+	{"sect", 167},
+	{"shy", 173},
+	{"sigma", 963},
+	{"sp", 8194},
+	{"sup1", 185},
+	{"sup2", 178},
+	{"sup3", 179},
+	{"szlig", 223},
+	{"tau", 964},
+	{"theta", 952},
+	{"thinsp", 8201},
+	{"thorn", 254},
+	{"times", 215},
+	{"trade", 8482},
+	{"uacute", 250},
+	{"ucirc", 251},
+	{"ugrave", 249},
+	{"uml", 168},
+	{"upsilon", 965},
+	{"uuml", 252},
+	{"varepsilon", 8712},
+	{"varphi", 981},
+	{"varpi", 982},
+	{"varrho", 1009},
+	{"vdots", 8942},
+	{"vsigma", 962},
+	{"vtheta", 977},
+	{"xi", 958},
+	{"yacute", 253},
+	{"yen", 165},
+	{"yuml", 255},
+	{"zeta", 950}
+};
+#define NCHARTAB (sizeof(chartab)/sizeof(chartab[0]))
+
+// Characters Winstart..Winend are those that Windows
+// uses interpolated into the Latin1 set.
+// They aren't supposed to appear in HTML, but they do....
+enum {
+	Winstart = 127,
+	Winend = 159
+};
+
+static int	winchars[]= { 8226,	// 8226 is a bullet
+	8226, 8226, 8218, 402, 8222, 8230, 8224, 8225,
+	710, 8240, 352, 8249, 338, 8226, 8226, 8226,
+	8226, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+	732, 8482, 353, 8250, 339, 8226, 8226, 376};
+
+static StringInt*	tagtable;		// initialized from tagnames
+static StringInt*	attrtable;		// initialized from attrnames
+
+static void		lexinit();
+static int		getplaindata(TokenSource* ts, Token* a, int* pai);
+static int		getdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai);
+static int		getscriptdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai);
+static int		gettag(TokenSource* ts, int starti, Token* a, int* pai);
+static Rune*		buftostr(Rune* s, Rune* buf, int j);
+static int		comment(TokenSource* ts);
+static int		findstr(TokenSource* ts, Rune* s);
+static int		ampersand(TokenSource* ts);
+//static int		lowerc(int c);
+static int		getchar(TokenSource* ts);
+static void		ungetchar(TokenSource* ts, int c);
+static void		backup(TokenSource* ts, int savei);
+//static void		freeinsidetoken(Token* t);
+static void		freeattrs(Attr* ahead);
+static Attr*		newattr(int attid, Rune* value, Attr* link);
+static int		Tconv(Fmt* f);
+
+int	dbglex = 0;
+static int lexinited = 0;
+
+static void
+lexinit(void)
+{
+	chartab = cvtstringinttab(_chartab, nelem(_chartab));
+	tagnames = cvtstringtab(_tagnames, nelem(_tagnames));
+	tagtable = _makestrinttab(tagnames, Numtags);
+	attrnames = cvtstringtab(_attrnames, nelem(_attrnames));
+	attrtable = _makestrinttab(attrnames, Numattrs);
+	fmtinstall('T', Tconv);
+	lexinited = 1;
+}
+
+static TokenSource*
+newtokensource(uchar* data, int edata, int chset, int mtype)
+{
+	TokenSource*	ans;
+
+	assert(chset == US_Ascii || chset == ISO_8859_1 ||
+			chset == UTF_8 || chset == Unicode);
+	ans = (TokenSource*)emalloc(sizeof(TokenSource));
+	ans->i = 0;
+	ans->data = data;
+	ans->edata = edata;
+	ans->chset = chset;
+	ans->mtype = mtype;
+	return ans;
+}
+
+enum {
+	ToksChunk = 500
+};
+
+// Call this to get the tokens.
+//  The number of returned tokens is returned in *plen.
+Token*
+_gettoks(uchar* data, int datalen, int chset, int mtype, int* plen)
+{
+	TokenSource*	ts;
+	Token*		a;
+	int	alen;
+	int	ai;
+	int	starti;
+	int	c;
+	int	tag;
+
+	if(!lexinited)
+		lexinit();
+	ts = newtokensource(data, datalen, chset, mtype);
+	alen = ToksChunk;
+	a = (Token*)emalloc(alen * sizeof(Token));
+	ai = 0;
+	if(dbglex)
+		fprint(2, "_gettoks starts, ts.i=%d, ts.edata=%d\n", ts->i, ts->edata);
+	if(ts->mtype == TextHtml) {
+		for(;;) {
+			if(ai == alen) {
+				a = (Token*)erealloc(a, (alen+ToksChunk)*sizeof(Token));
+				alen += ToksChunk;
+			}
+			starti = ts->i;
+			c = getchar(ts);
+			if(c < 0)
+				break;
+			if(c == '<') {
+				tag = gettag(ts, starti, a, &ai);
+				if(tag == Tscript) {
+					// special rules for getting Data after....
+					starti = ts->i;
+					c = getchar(ts);
+					tag = getscriptdata(ts, c, starti, a, &ai);
+				}
+			}
+			else
+				tag = getdata(ts, c, starti, a, &ai);
+			if(tag == -1)
+				break;
+			else if(dbglex > 1 && tag != Comment)
+				fprint(2, "lex: got token %T\n", &a[ai-1]);
+		}
+	}
+	else {
+		// plain text (non-html) tokens
+		for(;;) {
+			if(ai == alen) {
+				a = (Token*)erealloc(a, (alen+ToksChunk)*sizeof(Token));
+				alen += ToksChunk;
+			}
+			tag = getplaindata(ts, a, &ai);
+			if(tag == -1)
+				break;
+			if(dbglex > 1)
+				fprint(2, "lex: got token %T\n", &a[ai]);
+		}
+	}
+	if(dbglex)
+		fprint(2, "lex: returning %d tokens\n", ai);
+	*plen = ai;
+	if(ai == 0) 
+		return nil;
+	return a;
+}
+
+// For case where source isn't HTML.
+// Just make data tokens, one per line (or partial line,
+// at end of buffer), ignoring non-whitespace control
+// characters and dumping \r's.
+// If find non-empty token, fill in a[*pai], bump *pai, and return Data.
+// Otherwise return -1;
+static int
+getplaindata(TokenSource* ts, Token* a, int* pai)
+{
+	Rune*	s;
+	int	j;
+	int	starti;
+	int	c;
+	Token*	tok;
+	Rune	buf[BIGBUFSIZE];
+
+	s = nil;
+	j = 0;
+	starti = ts->i;
+	for(c = getchar(ts); c >= 0; c = getchar(ts)) {
+		if(c < ' ') {
+			if(isspace(c)) {
+				if(c == '\r') {
+					// ignore it unless no following '\n',
+					// in which case treat it like '\n'
+					c = getchar(ts);
+					if(c != '\n') {
+						if(c >= 0)
+							ungetchar(ts, c);
+						c = '\n';
+					}
+				}
+			}
+			else
+				c = 0;
+		}
+		if(c != 0) {
+			buf[j++] = c;
+			if(j == sizeof(buf)-1) {
+				s = buftostr(s, buf, j);
+				j = 0;
+			}
+		}
+		if(c == '\n')
+			break;
+	}
+	s = buftostr(s, buf, j);
+	if(s == nil)
+		return -1;
+	tok = &a[(*pai)++];
+	tok->tag = Data;
+	tok->text = s;
+	tok->attr = nil;
+	tok->starti = starti;
+	return Data;
+}
+
+// Return concatenation of s and buf[0:j]
+static Rune*
+buftostr(Rune* s, Rune* buf, int j)
+{
+	buf[j] = 0;
+	if(s == nil)
+		s = _Strndup(buf, j);
+	else 
+		s = _Strdup2(s, buf);
+	return s;
+}
+
+// Gather data up to next start-of-tag or end-of-buffer.
+// Translate entity references (&amp;).
+// Ignore non-whitespace control characters and get rid of \r's.
+// If find non-empty token, fill in a[*pai], bump *pai, and return Data.
+// Otherwise return -1;
+static int
+getdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai)
+{
+	Rune*	s;
+	int	j;
+	int	c;
+	Token*	tok;
+	Rune	buf[BIGBUFSIZE];
+
+	s = nil;
+	j = 0;
+	c = firstc;
+	while(c >= 0) {
+		if(c == '&') {
+			c = ampersand(ts);
+			if(c < 0)
+				break;
+		}
+		else if(c < ' ') {
+			if(isspace(c)) {
+				if(c == '\r') {
+					// ignore it unless no following '\n',
+					// in which case treat it like '\n'
+					c = getchar(ts);
+					if(c != '\n') {
+						if(c >= 0)
+							ungetchar(ts, c);
+						c = '\n';
+					}
+				}
+			}
+			else {
+				if(warn)
+					fprint(2, "warning: non-whitespace control character %d ignored\n", c);
+				c = 0;
+			}
+		}
+		else if(c == '<') {
+			ungetchar(ts, c);
+			break;
+		}
+		if(c != 0) {
+			buf[j++] = c;
+			if(j == BIGBUFSIZE-1) {
+				s = buftostr(s, buf, j);
+				j = 0;
+			}
+		}
+		c = getchar(ts);
+	}
+	s = buftostr(s, buf, j);
+	if(s == nil)
+		return -1;
+	tok = &a[(*pai)++];
+	tok->tag = Data;
+	tok->text = s;
+	tok->attr = nil;
+	tok->starti = starti;
+	return Data;
+}
+
+// The rules for lexing scripts are different (ugh).
+// Gather up everything until see a </SCRIPT>.
+static int
+getscriptdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai)
+{
+	Rune*	s;
+	int	j;
+	int	tstarti;
+	int	savei;
+	int	c;
+	int	tag;
+	int	done;
+	Token*	tok;
+	Rune	buf[BIGBUFSIZE];
+
+	s = nil;
+	j = 0;
+	tstarti = starti;
+	c = firstc;
+	done = 0;
+	while(c >= 0) {
+		if(c == '<') {
+			// other browsers ignore stuff to end of line after <!
+			savei = ts->i;
+			c = getchar(ts);
+			if(c == '!') {
+				while(c >= 0 && c != '\n' && c != '\r')
+					c = getchar(ts);
+				if(c == '\r')
+					c = getchar(ts);
+				if(c == '\n')
+					c = getchar(ts);
+			}
+			else if(c >= 0) {
+				backup(ts, savei);
+				tag = gettag(ts, tstarti, a, pai);
+				if(tag == -1)
+					break;
+				if(tag != Comment)
+					(*pai)--;
+				backup(ts, tstarti);
+				if(tag == Tscript + RBRA) {
+					done = 1;
+					break;
+				}
+				// here tag was not </SCRIPT>, so take as regular data
+				c = getchar(ts);
+			}
+		}
+		if(c < 0)
+			break;
+		if(c != 0) {
+			buf[j++] = c;
+			if(j == BIGBUFSIZE-1) {
+				s = buftostr(s, buf, j);
+				j = 0;
+			}
+		}
+		tstarti = ts->i;
+		c = getchar(ts);
+	}
+	if(done || ts->i == ts->edata) {
+		s = buftostr(s, buf, j);
+		tok = &a[(*pai)++];
+		tok->tag = Data;
+		tok->text = s;
+		tok->attr = nil;
+		tok->starti = starti;
+		return Data;
+	}
+	backup(ts, starti);
+	return -1;
+}
+
+// We've just seen a '<'.  Gather up stuff to closing '>' (if buffer
+// ends before then, return -1).
+// If it's a tag, look up the name, gather the attributes, and return
+// the appropriate token.
+// Else it's either just plain data or some kind of ignorable stuff:
+// return Data or Comment as appropriate.
+// If it's not a Comment, put it in a[*pai] and bump *pai.
+static int
+gettag(TokenSource* ts, int starti, Token* a, int* pai)
+{
+	int	rbra;
+	int	ans;
+	Attr*	al;
+	int	nexti;
+	int	c;
+	int	ti;
+	int	afnd;
+	int	attid;
+	int	quote;
+	Rune*	val;
+	int	nv;
+	int	i;
+	int	tag;
+	Token*	tok;
+	Rune	buf[BIGBUFSIZE];
+
+	rbra = 0;
+	nexti = ts->i;
+	tok = &a[*pai];
+	tok->tag = Notfound;
+	tok->text = nil;
+	tok->attr = nil;
+	tok->starti = starti;
+	c = getchar(ts);
+	if(c == '/') {
+		rbra = RBRA;
+		c = getchar(ts);
+	}
+	if(c < 0)
+		goto eob_done;
+	if(c >= 256 || !isalpha(c)) {
+		// not a tag
+		if(c == '!') {
+			ans = comment(ts);
+			if(ans != -1)
+				return ans;
+			goto eob_done;
+		}
+		else {
+			backup(ts, nexti);
+			tok->tag = Data;
+			tok->text = _Strdup(L(Llt));
+			(*pai)++;
+			return Data;
+		}
+	}
+	// c starts a tagname
+	buf[0] = c;
+	i = 1;
+	while(1) {
+		c = getchar(ts);
+		if(c < 0)
+			goto eob_done;
+		if(!ISNAMCHAR(c))
+			break;
+		// if name is bigger than buf it won't be found anyway...
+		if(i < BIGBUFSIZE)
+			buf[i++] = c;
+	}
+	if(_lookup(tagtable, Numtags, buf, i, &tag))
+		tok->tag = tag + rbra;
+	else
+		tok->text = _Strndup(buf, i);	// for warning print, in build
+
+	// attribute gathering loop
+	al = nil;
+	while(1) {
+		// look for "ws name" or "ws name ws = ws val"  (ws=whitespace)
+		// skip whitespace
+attrloop_continue:
+		while(c < 256 && isspace(c)) {
+			c = getchar(ts);
+			if(c < 0)
+				goto eob_done;
+		}
+		if(c == '>')
+			goto attrloop_done;
+		if(c == '<') {
+			if(warn)
+				fprint(2, "warning: unclosed tag\n");
+			ungetchar(ts, c);
+			goto attrloop_done;
+		}
+		if(c >= 256 || !isalpha(c)) {
+			if(warn)
+				fprint(2, "warning: expected attribute name\n");
+			// skipt to next attribute name
+			while(1) {
+				c = getchar(ts);
+				if(c < 0)
+					goto eob_done;
+				if(c < 256 && isalpha(c))
+					goto attrloop_continue;
+				if(c == '<') {
+					if(warn)
+						fprint(2, "warning: unclosed tag\n");
+					ungetchar(ts, 60);
+					goto attrloop_done;
+				}
+				if(c == '>')
+					goto attrloop_done;
+			}
+		}
+		// gather attribute name
+		buf[0] = c;
+		i = 1;
+		while(1) {
+			c = getchar(ts);
+			if(c < 0)
+				goto eob_done;
+			if(!ISNAMCHAR(c))
+				break;
+			if(i < BIGBUFSIZE-1)
+				buf[i++] = c;
+		}
+		afnd = _lookup(attrtable, Numattrs, buf, i, &attid);
+		if(warn && !afnd) {
+			buf[i] = 0;
+			fprint(2, "warning: unknown attribute name %S\n", buf);
+		}
+		// skip whitespace
+		while(c < 256 && isspace(c)) {
+			c = getchar(ts);
+			if(c < 0)
+				goto eob_done;
+		}
+		if(c != '=') {
+			if(afnd)
+				al = newattr(attid, nil, al);
+			goto attrloop_continue;
+		}
+		//# c is '=' here;  skip whitespace
+		while(1) {
+			c = getchar(ts);
+			if(c < 0)
+				goto eob_done;
+			if(c >= 256 || !isspace(c))
+				break;
+		}
+		quote = 0;
+		if(c == '\'' || c == '"') {
+			quote = c;
+			c = getchar(ts);
+			if(c < 0)
+				goto eob_done;
+		}
+		val = nil;
+		nv = 0;
+		while(1) {
+valloop_continue:
+			if(c < 0)
+				goto eob_done;
+			if(c == '>') {
+				if(quote) {
+					// c might be part of string (though not good style)
+					// but if line ends before close quote, assume
+					// there was an unmatched quote
+					ti = ts->i;
+					while(1) {
+						c = getchar(ts);
+						if(c < 0)
+							goto eob_done;
+						if(c == quote) {
+							backup(ts, ti);
+							buf[nv++] = '>';
+							if(nv == BIGBUFSIZE-1) {
+								val = buftostr(val, buf, nv);
+								nv = 0;
+							}
+							c = getchar(ts);
+							goto valloop_continue;
+						}
+						if(c == '\n') {
+							if(warn)
+								fprint(2, "warning: apparent unmatched quote\n");
+							backup(ts, ti);
+							c = '>';
+							goto valloop_done;
+						}
+					}
+				}
+				else
+					goto valloop_done;
+			}
+			if(quote) {
+				if(c == quote) {
+					c = getchar(ts);
+					if(c < 0)
+						goto eob_done;
+					goto valloop_done;
+				}
+				if(c == '\r') {
+					c = getchar(ts);
+					goto valloop_continue;
+				}
+				if(c == '\t' || c == '\n')
+					c = ' ';
+			}
+			else {
+				if(c < 256 && isspace(c))
+					goto valloop_done;
+			}
+			if(c == '&') {
+				c = ampersand(ts);
+				if(c == -1)
+					goto eob_done;
+			}
+			buf[nv++] = c;
+			if(nv == BIGBUFSIZE-1) {
+				val = buftostr(val, buf, nv);
+				nv = 0;
+			}
+			c = getchar(ts);
+		}
+valloop_done:
+		if(afnd) {
+			val = buftostr(val, buf, nv);
+			al = newattr(attid, val, al);
+		}
+	}
+
+attrloop_done:
+	tok->attr = al;
+	(*pai)++;
+	return tok->tag;
+
+eob_done:
+	if(warn)
+		fprint(2, "warning: incomplete tag at end of page\n");
+	backup(ts, nexti);
+	tok->tag = Data;
+	tok->text = _Strdup(L(Llt));
+	return Data;
+}
+
+// We've just read a '<!' at position starti,
+// so this may be a comment or other ignored section, or it may
+// be just a literal string if there is no close before end of file
+// (other browsers do that).
+// The accepted practice seems to be (note: contrary to SGML spec!):
+// If see <!--, look for --> to close, or if none, > to close.
+// If see <!(not --), look for > to close.
+// If no close before end of file, leave original characters in as literal data.
+//
+// If we see ignorable stuff, return Comment.
+// Else return nil (caller should back up and try again when more data arrives,
+// unless at end of file, in which case caller should just make '<' a data token).
+static int
+comment(TokenSource* ts)
+{
+	int	nexti;
+	int	havecomment;
+	int	c;
+
+	nexti = ts->i;
+	havecomment = 0;
+	c = getchar(ts);
+	if(c == '-') {
+		c = getchar(ts);
+		if(c == '-') {
+			if(findstr(ts, L(Larrow)))
+				havecomment = 1;
+			else
+				backup(ts, nexti);
+		}
+	}
+	if(!havecomment) {
+		if(c == '>')
+			havecomment = 1;
+		else if(c >= 0) {
+			if(findstr(ts, L(Lgt)))
+				havecomment = 1;
+		}
+	}
+	if(havecomment)
+		return Comment;
+	return -1;
+}
+
+// Look for string s in token source.
+// If found, return 1, with buffer at next char after s,
+// else return 0 (caller should back up).
+static int
+findstr(TokenSource* ts, Rune* s)
+{
+	int	c0;
+	int	n;
+	int	nexti;
+	int	i;
+	int	c;
+
+	c0 = s[0];
+	n = runestrlen(s);
+	while(1) {
+		c = getchar(ts);
+		if(c < 0)
+			break;
+		if(c == c0) {
+			if(n == 1)
+				return 1;
+			nexti = ts->i;
+			for(i = 1; i < n; i++) {
+				c = getchar(ts);
+				if(c < 0)
+					goto mainloop_done;
+				if(c != s[i])
+					break;
+			}
+			if(i == n)
+				return 1;
+			backup(ts, nexti);
+		}
+	}
+mainloop_done:
+	return 0;
+}
+
+// We've just read an '&'; look for an entity reference
+// name, and if found, return translated char.
+// if there is a complete entity name but it isn't known,
+// try prefixes (gets around some buggy HTML out there),
+// and if that fails, back up to just past the '&' and return '&'.
+// If the entity can't be completed in the current buffer, back up
+// to the '&' and return -1.
+static int
+ampersand(TokenSource* ts)
+{
+	int	savei;
+	int	c;
+	int	fnd;
+	int	ans;
+	int	v;
+	int	i;
+	int	k;
+	Rune	buf[SMALLBUFSIZE];
+
+	savei = ts->i;
+	c = getchar(ts);
+	fnd = 0;
+	ans = -1;
+	if(c == '#') {
+		c = getchar(ts);
+		v = 0;
+		while(c >= 0) {
+			if(!(c < 256 && isdigit(c)))
+				break;
+			v = v*10 + c - 48;
+			c = getchar(ts);
+		}
+		if(c >= 0) {
+			if(!(c == ';' || c == '\n' || c == '\r'))
+				ungetchar(ts, c);
+			c = v;
+			if(c == 160)
+				c = 160;
+			if(c >= Winstart && c <= Winend) {
+				c = winchars[c - Winstart];
+			}
+			ans = c;
+			fnd = 1;
+		}
+	}
+	else if(c < 256 && isalpha(c)) {
+		buf[0] = c;
+		k = 1;
+		while(1) {
+			c = getchar(ts);
+			if(c < 0)
+				break;
+			if(ISNAMCHAR(c)) {
+				if(k < SMALLBUFSIZE-1)
+					buf[k++] = c;
+			}
+			else {
+				if(!(c == ';' || c == '\n' || c == '\r'))
+					ungetchar(ts, c);
+				break;
+			}
+		}
+		if(c >= 0) {
+			fnd = _lookup(chartab, NCHARTAB, buf, k, &ans);
+			if(!fnd) {
+				// Try prefixes of s
+				if(c == ';' || c == '\n' || c == '\r')
+					ungetchar(ts, c);
+				i = k;
+				while(--k > 0) {
+					fnd = _lookup(chartab, NCHARTAB, buf, k, &ans);
+					if(fnd) {
+						while(i > k) {
+							i--;
+							ungetchar(ts, buf[i]);
+						}
+						break;
+					}
+				}
+			}
+		}
+	}
+	if(!fnd) {
+		backup(ts, savei);
+		ans = '&';
+	}
+	return ans;
+}
+
+// Get next char, obeying ts.chset.
+// Returns -1 if no complete character left before current end of data.
+static int
+getchar(TokenSource* ts)
+{
+	uchar*	buf;
+	int	c;
+	int	n;
+	int	ok;
+	Rune	r;
+
+	if(ts->i >= ts->edata)
+		return -1;
+	buf = ts->data;
+	c = buf[ts->i];
+	switch(ts->chset) {
+	case ISO_8859_1:
+		if(c >= Winstart && c <= Winend)
+			c = winchars[c - Winstart];
+		ts->i++;
+		break;
+	case US_Ascii:
+		if(c > 127) {
+			if(warn)
+				fprint(2, "non-ascii char (%x) when US-ASCII specified\n", c);
+		}
+		ts->i++;
+		break;
+	case UTF_8:
+		ok = fullrune((char*)(buf+ts->i), ts->edata-ts->i);
+		n = chartorune(&r, (char*)(buf+ts->i));
+		if(ok) {
+			if(warn && c == 0x80)
+				fprint(2, "warning: invalid utf-8 sequence (starts with %x)\n", ts->data[ts->i]);
+			ts->i += n;
+			c = r;
+		}
+		else {
+			// not enough bytes in buf to complete utf-8 char
+			ts->i = ts->edata;	// mark "all used"
+			c = -1;
+		}
+		break;
+	case Unicode:
+		if(ts->i < ts->edata - 1) {
+			//standards say most-significant byte first
+			c = (c << 8)|(buf[ts->i + 1]);
+			ts->i += 2;
+		}
+		else {
+			ts->i = ts->edata;	// mark "all used"
+			c = -1;
+		}
+		break;
+	}
+	return c;
+}
+
+// Assuming c was the last character returned by getchar, set
+// things up so that next getchar will get that same character
+// followed by the current 'next character', etc.
+static void
+ungetchar(TokenSource* ts, int c)
+{
+	int	n;
+	Rune	r;
+	char	a[UTFmax];
+
+	n = 1;
+	switch(ts->chset) {
+	case UTF_8:
+		if(c >= 128) {
+			r = c;
+			n = runetochar(a, &r);
+		}
+		break;
+	case Unicode:
+		n = 2;
+		break;
+	}
+	ts->i -= n;
+}
+
+// Restore ts so that it is at the state where the index was savei.
+static void
+backup(TokenSource* ts, int savei)
+{
+	if(dbglex)
+		fprint(2, "lex: backup; i=%d, savei=%d\n", ts->i, savei);
+	ts->i = savei;
+}
+
+
+// Look for value associated with attribute attid in token t.
+// If there is one, return 1 and put the value in *pans,
+// else return 0.
+// If xfer is true, transfer ownership of the string to the caller
+// (nil it out here); otherwise, caller must duplicate the answer
+// if it needs to save it.
+// OK to have pans==0, in which case this is just looking
+// to see if token is present.
+int
+_tokaval(Token* t, int attid, Rune** pans, int xfer)
+{
+	Attr*	attr;
+
+	attr = t->attr;
+	while(attr != nil) {
+		if(attr->attid == attid) {
+			if(pans != nil)
+				*pans = attr->value;
+			if(xfer)
+				attr->value = nil;
+			return 1;
+		}
+		attr = attr->next;
+	}
+	if(pans != nil)
+		*pans = nil;
+	return 0;
+}
+
+static int
+Tconv(Fmt *f)
+{
+	Token*	t;
+	int	i;
+	int	tag;
+	char*	srbra;
+	Rune*	aname;
+	Rune*	tname;
+	Attr*	a;
+	char	buf[BIGBUFSIZE];
+
+	t = va_arg(f->args, Token*);
+	if(t == nil)
+		sprint(buf, "<null>");
+	else {
+		i = 0;
+		if(dbglex > 1)
+			i = snprint(buf, sizeof(buf), "[%d]", t->starti);
+		tag = t->tag;
+		if(tag == Data) {
+			i += snprint(buf+i, sizeof(buf)-i-1, "'%S'", t->text);
+		}
+		else {
+			srbra = "";
+			if(tag >= RBRA) {
+				tag -= RBRA;
+				srbra = "/";
+			}
+			tname = tagnames[tag];
+			if(tag == Notfound)
+				tname = L(Lquestion);
+			i += snprint(buf+i, sizeof(buf)-i-1, "<%s%S", srbra, tname);
+			for(a = t->attr; a != nil; a = a->next) {
+				aname = attrnames[a->attid];
+				i += snprint(buf+i, sizeof(buf)-i-1, " %S", aname);
+				if(a->value != nil)
+					i += snprint(buf+i, sizeof(buf)-i-1, "=%S", a->value);
+			}
+			i += snprint(buf+i, sizeof(buf)-i-1, ">");
+		}
+		buf[i] = 0;
+	}
+	return fmtstrcpy(f, buf);
+}
+
+// Attrs own their constituent strings, but build may eventually
+// transfer some values to its items and nil them out in the Attr.
+static Attr*
+newattr(int attid, Rune* value, Attr* link)
+{
+	Attr* ans;
+
+	ans = (Attr*)emalloc(sizeof(Attr));
+	ans->attid = attid;
+	ans->value = value;
+	ans->next = link;
+	return ans;
+}
+
+// Free list of Attrs linked through next field
+static void
+freeattrs(Attr* ahead)
+{
+	Attr* a;
+	Attr* nexta;
+
+	a = ahead;
+	while(a != nil) {
+		nexta = a->next;
+		free(a->value);
+		free(a);
+		a = nexta;
+	}
+}
+
+// Free array of Tokens.
+// Allocated space might have room for more than n tokens,
+// but only n of them are initialized.
+// If caller has transferred ownership of constitutent strings
+// or attributes, it must have nil'd out the pointers in the Tokens.
+void
+_freetokens(Token* tarray, int n)
+{
+	int i;
+	Token* t;
+
+	if(tarray == nil)
+		return;
+	for(i = 0; i < n; i++) {
+		t = &tarray[i];
+		free(t->text);
+		freeattrs(t->attr);
+	}
+	free(tarray);
+}

diff --git a/src/libhtml/mkfile b/src/libhtml/mkfile
new file mode 100644
index 0000000..0952c45
--- /dev/null
+++ b/src/libhtml/mkfile

@@ -0,0 +1,22 @@
+<$SYS9/$systype/$objtype/mkfile
+
+LIB=$LIB9/libhtml.a
+
+OFILES=\
+	build.$O\
+	lex.$O\
+	strinttab.$O\
+	utils.$O\
+	runetab.$O\
+
+HFILES=\
+	$SYS9/sys/include/html.h\
+	impl.h\
+
+UPDATE=\
+	mkfile\
+	$HFILES\
+	${OFILES:%.$O=%.c}\
+	${LIB:$SYS9/$systype/$objtype/%=$SYS9/$systype/386/%}\
+
+<$SYS9/sys/src/cmd/mksyslib

diff --git a/src/libhtml/runetab.c b/src/libhtml/runetab.c
new file mode 100644
index 0000000..abd0a50
--- /dev/null
+++ b/src/libhtml/runetab.c

@@ -0,0 +1,83 @@
+#include <u.h>
+#include <libc.h>
+#include <draw.h>
+#include <html.h>
+#include "impl.h"
+
+Rune **runeconsttab;
+char *_runeconsttab[] = {
+	"        ",
+	" ",
+	"",
+	"#",
+	"+",
+	", ",
+	"-",
+	"-->",
+	"1",
+	"<",
+	">",
+	"?",
+	"Index search terms:",
+	"Reset",
+	"Submit",
+	"^0-9",
+	"_ISINDEX_",
+	"_blank",
+	"_fr",
+	"_no_name_submit_",
+	"_parent",
+	"_self",
+	"_top",
+	"application/x-www-form-urlencoded",
+	"circle",
+	"cm",
+	"content-script-type",
+	"disc",
+	"em",
+	"in",
+	"javascript",
+	"jscript",
+	"jscript1.1",
+	"mm",
+	"none",
+	"pi",
+	"pt",
+	"refresh",
+	"select",
+	"square",
+	"textarea",
+};
+
+Rune**
+cvtstringtab(char **tab, int n)
+{
+	int i;
+	Rune **rtab;
+
+	rtab = emalloc(n*sizeof(rtab[0]));
+	for(i=0; i<n; i++)
+		rtab[i] = toStr(tab[i], strlen(tab[i]), US_Ascii);
+	return rtab;
+}
+
+StringInt*
+cvtstringinttab(AsciiInt *tab, int n)
+{
+	int i;
+	StringInt *stab;
+
+	stab = emalloc(n*sizeof(stab[0]));
+	for(i=0; i<n; i++){
+		stab[i].key = toStr(tab[i].key, strlen(tab[i].key), US_Ascii);
+		stab[i].val = tab[i].val;
+	}
+	return stab;
+}
+
+void
+runetabinit(void)
+{
+	runeconsttab = cvtstringtab(_runeconsttab, nelem(_runeconsttab));
+	return;
+}

diff --git a/src/libhtml/runetab.h b/src/libhtml/runetab.h
new file mode 100644
index 0000000..edde98c
--- /dev/null
+++ b/src/libhtml/runetab.h

@@ -0,0 +1,59 @@
+typedef struct AsciiInt AsciiInt;
+
+struct AsciiInt {
+	char*	key;
+	int	val;
+};
+
+enum {
+	Ltab2space,
+	Lspace,
+	Lempty,
+	Lhash,
+	Lplus,
+	Lcommaspace,
+	Lminus,
+	Larrow,
+	Lone,
+	Llt,
+	Lgt,
+	Lquestion,
+	Lindex,
+	Lreset,
+	Lsubmit,
+	Lnot0to9,
+	Lisindex,
+	L_blank,
+	Lfr,
+	Lnoname,
+	L_parent,
+	L_self,
+	L_top,
+	Lappl_form,
+	Lcircle,
+	Lcm,
+	Lcontent,
+	Ldisc,
+	Lem,
+	Lin,
+	Ljavascript,
+	Ljscript,
+	Ljscript1,
+	Lmm,
+	Lnone,
+	Lpi,
+	Lpt,
+	Lrefresh,
+	Lselect,
+	Lsquare,
+	Ltextarea,
+};
+
+#define L(x)	runeconsttab[(x)]
+
+extern	Rune	**runeconsttab;
+
+/* XXX: for unix port only */
+Rune		**cvtstringtab(char**, int);
+StringInt	*cvtstringinttab(AsciiInt*, int);
+void		runetabinit(void);

diff --git a/src/libhtml/strinttab.c b/src/libhtml/strinttab.c
new file mode 100644
index 0000000..7883c04
--- /dev/null
+++ b/src/libhtml/strinttab.c

@@ -0,0 +1,64 @@
+#include <u.h>
+#include <libc.h>
+#include <draw.h>
+#include <html.h>
+#include "impl.h"
+
+// Do case-insensitive lookup of key[0:keylen] in t[0:n] (key part),
+// returning 1 if found, 0 if not.
+// Array t must be sorted in increasing lexicographic order of key.
+// If found, return corresponding val in *pans.
+int
+_lookup(StringInt* t, int n, Rune* key, int keylen, int* pans)
+{
+	int	min;
+	int	max;
+	int	try;
+	int	cmpresult;
+
+	min = 0;
+	max = n - 1;
+	while(min <= max) {
+		try = (min + max)/2;
+		cmpresult = _Strncmpci(key, keylen, t[try].key);
+		if(cmpresult > 0)
+			min = try + 1;
+		else if(cmpresult < 0)
+			max = try - 1;
+		else {
+			*pans = t[try].val;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+// Return first key in t[0:n] that corresponds to val,
+// nil if none.
+Rune*
+_revlookup(StringInt* t, int n, int val)
+{
+	int	i;
+
+	for(i = 0; i < n; i++)
+		if(t[i].val == val)
+			return t[i].key;
+	return nil;
+}
+
+// Make a StringInt table out of a[0:n], mapping each string
+// to its index.  Check that entries are in alphabetical order.
+StringInt*
+_makestrinttab(Rune** a, int n)
+{
+	StringInt*	ans;
+	int	i;
+
+	ans = (StringInt*)emalloc(n * sizeof(StringInt));
+	for(i = 0; i < n; i++) {
+		ans[i].key = a[i];
+		ans[i].val = i;
+		assert(i == 0 || runestrcmp(a[i], a[i - 1]) >= 0);
+	}
+	return ans;
+}

diff --git a/src/libhtml/utils.c b/src/libhtml/utils.c
new file mode 100644
index 0000000..db22bba
--- /dev/null
+++ b/src/libhtml/utils.c

@@ -0,0 +1,591 @@
+#include <u.h>
+#include <libc.h>
+#include <draw.h>
+#include <html.h>
+#include "impl.h"
+
+Rune whitespace[] = { ' ', '\t', '\n', '\r', '\0' };
+Rune notwhitespace[] = { '^', ' ', '\t', '\n', '\r' , '\0'};
+
+// All lists start out like List structure.
+// List itself can be used as list of int.
+int
+_listlen(List* l)
+{
+	int n = 0;
+
+	while(l != nil) {
+		l = l->next;
+		n++;
+	}
+	return n;
+}
+
+// Cons
+List*
+_newlist(int val, List* rest)
+{
+	List* ans;
+
+	ans = (List*)emalloc(sizeof(List));
+	ans->val = val;
+	ans->next = rest;
+	return ans;
+}
+
+// Reverse a list in place
+List*
+_revlist(List* l)
+{
+	List* newl;
+	List* nextl;
+
+	newl = nil;
+	while(l != nil) {
+		nextl = l->next;
+		l->next = newl;
+		newl = l;
+		l = nextl;
+	}
+	return newl;
+}
+
+// The next few routines take a "character class" as argument.
+//    e.g., "a-zA-Z", or "^ \t\n"
+// (ranges indicated by - except in first position;
+//  ^ is first position means "not in" the following class)
+
+// Splitl splits s[0:n] just before first character of class cl.
+// Answers go in (p1, n1) and (p2, n2).
+// If no split, the whole thing goes in the first component.
+// Note: answers contain pointers into original string.
+void
+_splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
+{
+	Rune* p;
+
+	p = _Strnclass(s, cl, n);
+	*p1 = s;
+	if(p == nil) {
+		*n1 = n;
+		*p2 = nil;
+		*n2 = 0;
+	}
+	else {
+		*p2 = p;
+		*n1 = p-s;
+		*n2 = n-*n1;
+	}
+}
+
+// Splitr splits s[0:n] just after last character of class cl.
+// Answers go in (p1, n1) and (p2, n2).
+// If no split, the whole thing goes in the last component.
+// Note: answers contain pointers into original string.
+void
+_splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
+{
+	Rune* p;
+
+	p = _Strnrclass(s, cl, n);
+	if(p == nil) {
+		*p1 = nil;
+		*n1 = 0;
+		*p2 = s;
+		*n2 = n;
+	}
+	else {
+		*p1 = s;
+		*p2 = p+1;
+		*n1 = *p2-s;
+		*n2 = n-*n1;
+	}
+}
+
+// Splitall splits s[0:n] into parts that are separated by characters from class cl.
+// Each part will have nonzero length.
+// At most alen parts are found, and pointers to their starts go into
+// the strarr array, while their lengths go into the lenarr array.
+// The return value is the number of parts found.
+int
+_splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
+{
+	int i;
+	Rune* p;
+	Rune* q;
+	Rune* slast;
+
+	if(s == nil || n == 0)
+		return 0;
+	i = 0;
+	p = s;
+	slast = s+n;
+	while(p < slast && i < alen) {
+		while(p < slast && _inclass(*p, cl))
+			p++;
+		if(p == slast)
+			break;
+		q = _Strnclass(p, cl, slast-p);
+		if(q == nil)
+			q = slast;
+		assert(q > p && q <= slast);
+		strarr[i] = p;
+		lenarr[i] = q-p;
+		i++;
+		p = q;
+	}
+	return i;
+}
+
+// Find part of s that excludes leading and trailing whitespace,
+// and return that part in *pans (and its length in *panslen).
+void
+_trimwhite(Rune* s, int n, Rune** pans, int* panslen)
+{
+	Rune* p;
+	Rune* q;
+
+	p = nil;
+	if(n > 0) {
+		p = _Strnclass(s, notwhitespace, n);
+		if(p != nil) {
+			q = _Strnrclass(s, notwhitespace, n);
+			assert(q != nil);
+			n = q+1-p;
+		}
+	}
+	*pans = p;
+	*panslen = n;
+}
+
+// _Strclass returns a pointer to the first element of s that is
+// a member of class cl, nil if none.
+Rune*
+_Strclass(Rune* s, Rune* cl)
+{
+	Rune* p;
+
+	for(p = s; *p != 0; p++)
+		if(_inclass(*p, cl))
+			return p;
+	return nil;
+}
+
+// _Strnclass returns a pointer to the first element of s[0:n] that is
+// a member of class cl, nil if none.
+Rune*
+_Strnclass(Rune* s, Rune* cl, int n)
+{
+	Rune* p;
+
+	for(p = s; n-- && *p != 0; p++)
+		if(_inclass(*p, cl))
+			return p;
+	return nil;
+}
+
+// _Strrclass returns a pointer to the last element of s that is
+// a member of class cl, nil if none
+Rune*
+_Strrclass(Rune* s, Rune* cl)
+{
+	Rune* p;
+
+	if(s == nil || *s == 0)
+		return nil;
+	p = s + runestrlen(s) - 1;
+	while(p >= s) {
+		if(_inclass(*p, cl))
+			return p;
+		p--;
+	};
+	return nil;
+}
+
+// _Strnrclass returns a pointer to the last element of s[0:n] that is
+// a member of class cl, nil if none
+Rune*
+_Strnrclass(Rune* s, Rune* cl, int n)
+{
+	Rune* p;
+
+	if(s == nil || *s == 0 || n == 0)
+		return nil;
+	p = s + n - 1;
+	while(p >= s) {
+		if(_inclass(*p, cl))
+			return p;
+		p--;
+	};
+	return nil;
+}
+
+// Is c in the class cl?
+int
+_inclass(Rune c, Rune* cl)
+{
+	int	n;
+	int	ans;
+	int	negate;
+	int	i;
+
+	n = _Strlen(cl);
+	if(n == 0)
+		return 0;
+	ans = 0;
+	negate = 0;
+	if(cl[0] == '^') {
+		negate = 1;
+		cl++;
+		n--;
+	}
+	for(i = 0; i < n; i++) {
+		if(cl[i] == '-' && i > 0 && i < n - 1) {
+			if(c >= cl[i - 1] && c <= cl[i + 1]) {
+				ans = 1;
+				break;
+			}
+			i++;
+		}
+		else if(c == cl[i]) {
+			ans = 1;
+			break;
+		}
+	}
+	if(negate)
+		ans = !ans;
+	return ans;
+}
+
+// Is pre a prefix of s?
+int
+_prefix(Rune* pre, Rune* s)
+{
+	int	ns;
+	int	n;
+	int	k;
+
+	ns = _Strlen(s);
+	n = _Strlen(pre);
+	if(ns < n)
+		return 0;
+	for(k = 0; k < n; k++) {
+		if(pre[k] != s[k])
+			return 0;
+	}
+	return 1;
+}
+
+// Number of runes in (null-terminated) s
+int
+_Strlen(Rune* s)
+{
+	if(s == nil)
+		return 0;
+	return runestrlen(s);
+}
+
+// -1, 0, 1 as s1 is lexicographically less, equal greater than s2
+int
+_Strcmp(Rune *s1, Rune *s2)
+{
+	if(s1 == nil)
+		return (s2 == nil || *s2 == 0) ? 0 : -1;
+	if(s2 == nil)
+		return (*s1 == 0) ? 0 : 1;
+	return runestrcmp(s1, s2);
+}
+
+// Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars).
+// Also, do a case-insensitive match, assuming s2
+// has no chars in [A-Z], only their lowercase versions.
+// (This routine is used for in-place keyword lookup, where s2 is in a keyword
+// list and s1 is some substring, possibly mixed-case, in a buffer.)
+int
+_Strncmpci(Rune *s1, int n1, Rune *s2)
+{
+	Rune c1, c2;
+
+	for(;;) {
+		if(n1-- == 0) {
+			if(*s2 == 0)
+				return 0;
+			return -1;
+		}
+		c1 = *s1++;
+		c2 = *s2++;
+		if(c1 >= 'A' && c1 <= 'Z')
+			c1 = c1 - 'A' + 'a';
+		if(c1 != c2) {
+			if(c1 > c2)
+				return 1;
+			return -1;
+		}
+	}
+}
+
+// emalloc and copy
+Rune*
+_Strdup(Rune* s)
+{
+	if(s == nil)
+		return nil;
+	return _Strndup(s, runestrlen(s));
+}
+
+// emalloc and copy n chars of s (assume s is at least that long),
+// and add 0 terminator.
+// Return nil if n==0.
+Rune*
+_Strndup(Rune* s, int n)
+{
+	Rune* ans;
+
+	if(n <= 0)
+		return nil;
+	ans = _newstr(n);
+	memmove(ans, s, n*sizeof(Rune));
+	ans[n] = 0;
+	return ans;
+}
+// emalloc enough room for n Runes, plus 1 null terminator.
+// (Not initialized to anything.)
+Rune*
+_newstr(int n)
+{
+	return (Rune*)emalloc((n+1)*sizeof(Rune));
+}
+
+// emalloc and copy s+t
+Rune*
+_Strdup2(Rune* s, Rune* t)
+{
+	int ns, nt;
+	Rune* ans;
+	Rune* p;
+
+	ns = _Strlen(s);
+	nt = _Strlen(t);
+	if(ns+nt == 0)
+		return nil;
+	ans = _newstr(ns+nt);
+	p = _Stradd(ans, s, ns);
+	p = _Stradd(p, t, nt);
+	*p = 0;
+	return ans;
+}
+
+// Return emalloc'd substring s[start:stop],
+Rune*
+_Strsubstr(Rune* s, int start, int stop)
+{
+	Rune* t;
+
+	if(start == stop)
+		return nil;
+	t = _Strndup(s+start, stop-start);
+	return t;
+}
+
+// Copy n chars to s1 from s2, and return s1+n
+Rune*
+_Stradd(Rune* s1, Rune* s2, int n)
+{
+	if(n == 0)
+		return s1;
+	memmove(s1, s2, n*sizeof(Rune));
+	return s1+n;
+}
+
+// Like strtol, but converting from Rune* string
+
+//#define LONG_MAX	2147483647L
+//#define LONG_MIN	-2147483648L
+
+long
+_Strtol(Rune* nptr, Rune** endptr, int base)
+{
+	Rune* p;
+	long n, nn;
+	int c, ovfl, v, neg, ndig;
+
+	p = nptr;
+	neg = 0;
+	n = 0;
+	ndig = 0;
+	ovfl = 0;
+
+	/*
+	 * White space
+	 */
+	for(;;p++){
+		switch(*p){
+		case ' ':
+		case '\t':
+		case '\n':
+		case '\f':
+		case '\r':
+		case '\v':
+			continue;
+		}
+		break;
+	}
+
+	/*
+	 * Sign
+	 */
+	if(*p=='-' || *p=='+')
+		if(*p++ == '-')
+			neg = 1;
+
+	/*
+	 * Base
+	 */
+	if(base==0){
+		if(*p != '0')
+			base = 10;
+		else{
+			base = 8;
+			if(p[1]=='x' || p[1]=='X'){
+				p += 2;
+				base = 16;
+			}
+		}
+	}else if(base==16 && *p=='0'){
+		if(p[1]=='x' || p[1]=='X')
+			p += 2;
+	}else if(base<0 || 36<base)
+		goto Return;
+
+	/*
+	 * Non-empty sequence of digits
+	 */
+	for(;; p++,ndig++){
+		c = *p;
+		v = base;
+		if('0'<=c && c<='9')
+			v = c - '0';
+		else if('a'<=c && c<='z')
+			v = c - 'a' + 10;
+		else if('A'<=c && c<='Z')
+			v = c - 'A' + 10;
+		if(v >= base)
+			break;
+		nn = n*base + v;
+		if(nn < n)
+			ovfl = 1;
+		n = nn;
+	}
+
+    Return:
+	if(ndig == 0)
+		p = nptr;
+	if(endptr)
+		*endptr = p;
+	if(ovfl){
+		if(neg)
+			return LONG_MIN;
+		return LONG_MAX;
+	}
+	if(neg)
+		return -n;
+	return n;
+}
+
+// Convert buf[0:n], bytes whose character set is chset,
+// into a emalloc'd null-terminated Unicode string.
+Rune*
+toStr(uchar* buf, int n, int chset)
+{
+	int i;
+	int m;
+	Rune ch;
+	Rune* ans;
+
+	switch(chset) {
+	case US_Ascii:
+	case ISO_8859_1:
+		ans = (Rune*)emalloc((n+1)*sizeof(Rune));
+		for(i = 0; i < n; i++)
+			ans[i] = buf[i];
+		ans[n] = 0;
+		break;
+
+	case UTF_8:
+		m = 0;
+		for(i = 0; i < n; ) {
+			i += chartorune(&ch, (char*)(buf+i));
+			m++;
+		}
+		ans = (Rune*)emalloc((m+1)*sizeof(Rune));
+		m = 0;
+		for(i = 0; i < n; ) {
+			i += chartorune(&ch, (char*)(buf+i));
+			ans[m++] = ch;
+		}
+		ans[m] = 0;
+		break;
+
+	default:
+		ans = nil;
+		assert(0);
+	}
+	return ans;
+}
+
+// Convert buf[0:n], Unicode characters,
+// into an emalloc'd null-terminated string in character set chset.
+// Use 0x80 for unconvertable characters.
+uchar*
+fromStr(Rune* buf, int n, int chset)
+{
+	uchar* ans;
+	int i, lim, m;
+	Rune ch;
+	uchar* p;
+	uchar s[UTFmax];
+
+	ans = nil;
+	switch(chset) {
+	case US_Ascii:
+	case ISO_8859_1:
+		ans = (uchar*)emalloc(n+1);
+		lim = (chset==US_Ascii)? 127 : 255;
+		for(i = 0; i < n; i++) {
+			ch = buf[i];
+			if(ch > lim)
+				ch = 0x80;
+			ans[i] = ch;
+		}
+		ans[n] = 0;
+		break;
+
+	case UTF_8:
+		m = 0;
+		for(i = 0; i < n; i++) {
+			m += runetochar((char*)s, &buf[i]);
+		}
+		ans = (uchar*)emalloc(m+1);
+		p = ans;
+		for(i = 0; i < n; i++)
+			p += runetochar((char*)p, &buf[i]);
+		*p = 0;
+		break;
+
+	default:
+		assert(0);
+	}
+	return ans;
+
+}
+
+// Convert n to emalloc'd String.
+Rune*
+_ltoStr(int n)
+{
+	int m;
+	uchar buf[20];
+
+	m = snprint((char*)buf, sizeof(buf), "%d", n);
+	return toStr(buf, m, US_Ascii);
+}
commit	7cf289ca89a7416999ae02330236042b0d37e3db	[log] [tgz]
author	wkj <devnull@localhost>	Tue Apr 06 19:06:52 2004 +0000
committer	wkj <devnull@localhost>	Tue Apr 06 19:06:52 2004 +0000
tree	796d1363a7a53c72c28b199758ee674f1326a510
parent	3e3817f7c86658f60715dd93768eaf8285807985 [diff]