Add libString.
diff --git a/include/html.h b/include/html.h
new file mode 100644
index 0000000..019ad73
--- /dev/null
+++ b/include/html.h
@@ -0,0 +1,629 @@
+#ifndef _HTML_H_
+#define _HTML_H_ 1
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ #pragma lib "libhtml.a"
+ #pragma src "/sys/src/libhtml"
+*/
+
+// UTILS
+extern uchar*	fromStr(Rune* buf, int n, int chset);
+extern Rune*	toStr(uchar* buf, int n, int chset);
+
+// Common LEX and BUILD enums
+
+// Media types
+enum
+{
+	ApplMsword,
+	ApplOctets,
+	ApplPdf,
+	ApplPostscript,
+	ApplRtf,
+	ApplFramemaker,
+	ApplMsexcel,
+	ApplMspowerpoint,
+	UnknownType,
+	Audio32kadpcm,
+	AudioBasic,
+	ImageCgm,
+	ImageG3fax,
+	ImageGif,
+	ImageIef,
+	ImageJpeg,
+	ImagePng,
+	ImageTiff,
+	ImageXBit,
+	ImageXBit2,
+	ImageXBitmulti,
+	ImageXXBitmap,
+	ModelVrml,
+	MultiDigest,
+	MultiMixed,
+	TextCss,
+	TextEnriched,
+	TextHtml,
+	TextJavascript,
+	TextPlain,
+	TextRichtext,
+	TextSgml,
+	TextTabSeparatedValues,
+	TextXml,
+	VideoMpeg,
+	VideoQuicktime,
+	NMEDIATYPES
+};
+
+// HTTP methods
+enum
+{
+	HGet,
+	HPost
+};
+
+// Charsets
+enum
+{
+	UnknownCharset,
+	US_Ascii,
+	ISO_8859_1,
+	UTF_8,
+	Unicode,
+	NCHARSETS
+};
+
+// Frame Target IDs
+enum {
+	FTtop,
+	FTself,
+	FTparent,
+	FTblank
+};
+
+// LEX
+typedef struct Token Token;
+typedef struct Attr Attr;
+
+// BUILD
+
+typedef struct Item Item;
+typedef struct Itext Itext;
+typedef struct Irule Irule;
+typedef struct Iimage Iimage;
+typedef struct Iformfield Iformfield;
+typedef struct Itable Itable;
+typedef struct Ifloat Ifloat;
+typedef struct Ispacer Ispacer;
+typedef struct Genattr Genattr;
+typedef struct SEvent SEvent;
+typedef struct Formfield Formfield;
+typedef struct Option Option;
+typedef struct Form Form;
+typedef struct Table Table;
+typedef struct Tablecol Tablecol;
+typedef struct Tablerow Tablerow;
+typedef struct Tablecell Tablecell;
+typedef struct Align Align;
+typedef struct Dimen Dimen;
+typedef struct Anchor Anchor;
+typedef struct DestAnchor DestAnchor;
+typedef struct Map Map;
+typedef struct Area Area;
+typedef struct Background Background;
+typedef struct Kidinfo Kidinfo;
+typedef struct Docinfo Docinfo;
+typedef struct Stack Stack;
+typedef struct Pstate Pstate;
+typedef struct ItemSource ItemSource;
+typedef struct Lay Lay;	// defined in Layout module
+
+// Alignment types
+enum {
+	ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
+	ALchar, ALtop, ALmiddle, ALbottom, ALbaseline
+};
+
+struct Align
+{
+	uchar	halign;	// one of ALnone, ALleft, etc.
+	uchar	valign;	// one of ALnone, ALtop, etc.
+};
+
+// A Dimen holds a dimension specification, especially for those
+// cases when a number can be followed by a % or a * to indicate
+// percentage of total or relative weight.
+// Dnone means no dimension was specified
+
+// To fit in a word, use top bits to identify kind, rest for value
+enum {
+	Dnone =		0,
+	Dpixels =		(1<<29),
+	Dpercent =	(2<<29),
+	Drelative =	(3<<29),
+	Dkindmask =	(3<<29),
+	Dspecmask =	(~Dkindmask)
+};
+
+struct Dimen
+{
+	int	kindspec;		// kind | spec
+};
+
+// Background is either an image or a color.
+// If both are set, the image has precedence.
+struct Background
+{
+	Rune*	image;	// url
+	int		color;
+};
+
+
+// There are about a half dozen Item variants.
+// The all look like this at the start (using Plan 9 C's
+// anonymous structure member mechanism),
+// and then the tag field dictates what extra fields there are.
+struct Item
+{
+	Item*	next;		// successor in list of items
+	int		width;	// width in pixels (0 for floating items)
+	int		height;	// height in pixels
+	int		ascent;	// ascent (from top to baseline) in pixels
+	int		anchorid;	// if nonzero, which anchor we're in
+	int		state;	// flags and values (see below)
+	Genattr*	genattr;	// generic attributes and events
+	int		tag;		// variant discriminator: Itexttag, etc.
+};
+
+// Item variant tags
+enum {
+	Itexttag,
+	Iruletag,
+	Iimagetag,
+	Iformfieldtag,
+	Itabletag,
+	Ifloattag,
+	Ispacertag
+};
+
+struct Itext
+{
+	Item _item;				// (with tag ==Itexttag)
+	Rune*	s;			// the characters
+	int		fnt;			// style*NumSize+size (see font stuff, below)
+	int		fg;			// Pixel (color) for text
+	uchar	voff;			// Voffbias+vertical offset from baseline, in pixels (+ve == down)
+	uchar	ul;			// ULnone, ULunder, or ULmid
+};
+
+struct Irule
+{
+	Item _item;				// (with tag ==Iruletag)
+	uchar	align;		// alignment spec
+	uchar	noshade;		// if true, don't shade
+	int		size;			// size attr (rule height)
+	Dimen	wspec;		// width spec
+};
+
+
+struct Iimage
+{
+	Item _item;				// (with tag ==Iimagetag)
+	Rune*	imsrc;		// image src url
+	int		imwidth;		// spec width (actual, if no spec)
+	int		imheight;		// spec height (actual, if no spec)
+	Rune*	altrep;		// alternate representation, in absence of image
+	Map*	map;			// if non-nil, client side map
+	int		ctlid;			// if animated
+	uchar	align;		// vertical alignment
+	uchar	hspace;		// in pixels; buffer space on each side
+	uchar	vspace;		// in pixels; buffer space on top and bottom
+	uchar	border;		// in pixels: border width to draw around image
+	Iimage*	nextimage;	// next in list of document's images
+};
+
+
+struct Iformfield
+{
+	Item _item;				// (with tag ==Iformfieldtag)
+	Formfield*	formfield;
+};
+
+
+struct Itable
+{
+	Item _item;				// (with tag ==Itabletag)
+	Table*	table;
+};
+
+
+struct Ifloat
+{
+	Item _item;				// (with tag ==Ifloattag)
+	Item*	item;			// table or image item that floats
+	int		x;			// x coord of top (from right, if ALright)
+	int		y;			// y coord of top
+	uchar	side;			// margin it floats to: ALleft or ALright
+	uchar	infloats;		// true if this has been added to a lay.floats
+	Ifloat*	nextfloat;		// in list of floats
+};
+
+
+struct Ispacer
+{
+	Item _item;				// (with tag ==Ispacertag)
+	int		spkind;		// ISPnull, etc.
+};
+
+// Item state flags and value fields
+enum {
+	IFbrk =			0x80000000,	// forced break before this item
+	IFbrksp =			0x40000000,	// add 1 line space to break (IFbrk set too)
+	IFnobrk =			0x20000000,	// break not allowed before this item
+	IFcleft =			0x10000000,	// clear left floats (IFbrk set too)
+	IFcright =			0x08000000,	// clear right floats (IFbrk set too)
+	IFwrap =			0x04000000,	// in a wrapping (non-pre) line
+	IFhang =			0x02000000,	// in a hanging (into left indent) item
+	IFrjust =			0x01000000,	// right justify current line
+	IFcjust =			0x00800000,	// center justify current line
+	IFsmap =			0x00400000,	// image is server-side map
+	IFindentshift =		8,
+	IFindentmask =		(255<<IFindentshift),	// current indent, in tab stops
+	IFhangmask =		255			// current hang into left indent, in 1/10th tabstops
+};
+
+// Bias added to Itext's voff field
+enum { Voffbias = 128 };
+
+// Spacer kinds
+enum {
+	ISPnull,			// 0 height and width
+	ISPvline,			// height and ascent of current font
+	ISPhspace,		// width of space in current font
+	ISPgeneral		// other purposes (e.g., between markers and list)
+};
+
+// Generic attributes and events (not many elements will have any of these set)
+struct Genattr
+{
+	Rune*	id;
+	Rune*	class;
+	Rune*	style;
+	Rune*	title;
+	SEvent*	events;
+};
+
+struct SEvent
+{
+	SEvent*	next;		// in list of events
+	int		type;		// SEonblur, etc.
+	Rune*	script;
+};
+
+enum {
+	SEonblur, SEonchange, SEonclick, SEondblclick,
+	SEonfocus, SEonkeypress, SEonkeyup, SEonload,
+	SEonmousedown, SEonmousemove, SEonmouseout,
+	SEonmouseover, SEonmouseup, SEonreset, SEonselect,
+	SEonsubmit, SEonunload,
+	Numscriptev
+};
+
+// Form field types
+enum {
+	Ftext,
+	Fpassword,
+	Fcheckbox,
+	Fradio,
+	Fsubmit,
+	Fhidden,
+	Fimage,
+	Freset,
+	Ffile,
+	Fbutton,
+	Fselect,
+	Ftextarea
+};
+
+// Information about a field in a form
+struct Formfield
+{
+	Formfield*	next;		// in list of fields for a form
+	int			ftype;	// Ftext, Fpassword, etc.
+	int			fieldid;	// serial no. of field within its form
+	Form*		form;	// containing form
+	Rune*		name;	// name attr
+	Rune*		value;	// value attr
+	int			size;		// size attr
+	int			maxlength;	// maxlength attr
+	int			rows;	// rows attr
+	int			cols;		// cols attr
+	uchar		flags;	// FFchecked, etc.
+	Option*		options;	// for Fselect fields
+	Item*		image;	// image item, for Fimage fields
+	int			ctlid;		// identifies control for this field in layout
+	SEvent*		events;	// same as genattr->events of containing item
+};
+
+enum {
+	FFchecked =	(1<<7),
+	FFmultiple =	(1<<6)
+};
+
+// Option holds info about an option in a "select" form field
+struct Option
+{
+	Option*	next;			// next in list of options for a field
+	int		selected;		// true if selected initially
+	Rune*	value;		// value attr
+	Rune*	display;		// display string
+};
+
+// Form holds info about a form
+struct Form
+{
+	Form*		next;		// in list of forms for document
+	int			formid;	// serial no. of form within its doc
+	Rune*		name;	// name or id attr (netscape uses name, HTML 4.0 uses id)
+	Rune*		action;	// action attr
+	int			target;	// target attr as targetid
+	int			method;	// HGet or HPost
+	int			nfields;	// number of fields
+	Formfield*	fields;	// field's forms, in input order
+};
+
+// Flags used in various table structures
+enum {
+	TFparsing =	(1<<7),
+	TFnowrap =	(1<<6),
+	TFisth =		(1<<5)
+};
+
+
+// Information about a table
+struct Table
+{
+	Table*		next;			// next in list of document's tables
+	int			tableid;		// serial no. of table within its doc
+	Tablerow*	rows;		// array of row specs (list during parsing)
+	int			nrow;		// total number of rows
+	Tablecol*		cols;			// array of column specs
+	int			ncol;			// total number of columns
+	Tablecell*		cells;			// list of unique cells
+	int			ncell;		// total number of cells
+	Tablecell***	grid;			// 2-D array of cells
+	Align		align;		// alignment spec for whole table
+	Dimen		width;		// width spec for whole table
+	int			border;		// border attr
+	int			cellspacing;	// cellspacing attr
+	int			cellpadding;	// cellpadding attr
+	Background	background;	// table background
+	Item*		caption;		// linked list of Items, giving caption
+	uchar		caption_place;	// ALtop or ALbottom
+	Lay*			caption_lay;	// layout of caption
+	int			totw;			// total width
+	int			toth;			// total height
+	int			caph;		// caption height
+	int			availw;		// used for previous 3 sizes
+	Token*		tabletok;		// token that started the table
+	uchar		flags;		// Lchanged, perhaps
+};
+
+
+struct Tablecol
+{
+	int		width;
+	Align	align;
+	Point		pos;
+};
+
+
+struct Tablerow
+{
+	Tablerow*	next;			// Next in list of rows, during parsing
+	Tablecell*		cells;			// Cells in row, linked through nextinrow
+	int			height;
+	int			ascent;
+	Align		align;
+	Background	background;
+	Point			pos;
+	uchar		flags;		// 0 or TFparsing
+};
+
+
+// A Tablecell is one cell of a table.
+// It may span multiple rows and multiple columns.
+// Cells are linked on two lists: the list for all the cells of
+// a document (the next pointers), and the list of all the
+// cells that start in a given row (the nextinrow pointers)
+struct Tablecell
+{
+	Tablecell*		next;			// next in list of table's cells
+	Tablecell*		nextinrow;	// next in list of row's cells
+	int			cellid;		// serial no. of cell within table
+	Item*		content;		// contents before layout
+	Lay*			lay;			// layout of cell
+	int			rowspan;		// number of rows spanned by this cell
+	int			colspan;		// number of cols spanned by this cell
+	Align		align;		// alignment spec
+	uchar		flags;		// TFparsing, TFnowrap, TFisth
+	Dimen		wspec;		// suggested width
+	int			hspec;		// suggested height
+	Background	background;	// cell background
+	int			minw;		// minimum possible width
+	int			maxw;		// maximum width
+	int			ascent;		// cell's ascent
+	int			row;			// row of upper left corner
+	int			col;			// col of upper left corner
+	Point			pos;			// nw corner of cell contents, in cell
+};
+
+// Anchor is for info about hyperlinks that go somewhere
+struct Anchor
+{
+	Anchor*		next;		// next in list of document's anchors
+	int			index;	// serial no. of anchor within its doc
+	Rune*		name;	// name attr
+	Rune*		href;		// href attr
+	int			target;	// target attr as targetid
+};
+
+
+// DestAnchor is for info about hyperlinks that are destinations
+struct DestAnchor
+{
+	DestAnchor*	next;		// next in list of document's destanchors
+	int			index;	// serial no. of anchor within its doc
+	Rune*		name;	// name attr
+	Item*		item;		// the destination
+};
+
+
+// Maps (client side)
+struct Map
+{
+	Map*	next;			// next in list of document's maps
+	Rune*	name;		// map name
+	Area*	areas;		// list of map areas
+};
+
+
+struct Area
+{
+	Area*		next;		// next in list of a map's areas
+	int			shape;	// SHrect, etc.
+	Rune*		href;		// associated hypertext link
+	int			target;	// associated target frame
+	Dimen*		coords;	// array of coords for shape
+	int			ncoords;	// size of coords array
+};
+
+// Area shapes
+enum {
+	SHrect, SHcircle, SHpoly
+};
+
+// Fonts are represented by integers: style*NumSize + size
+
+// Font styles
+enum {
+	FntR,			// roman
+	FntI,			// italic
+	FntB,			// bold
+	FntT,			// typewriter
+	NumStyle
+};
+
+// Font sizes
+enum {
+	Tiny,
+	Small,
+	Normal,
+	Large,
+	Verylarge,
+	NumSize
+};
+
+enum {
+	NumFnt = (NumStyle*NumSize),
+	DefFnt = (FntR*NumSize+Normal)
+};
+
+// Lines are needed through some text items, for underlining or strikethrough
+enum {
+	ULnone, ULunder, ULmid
+};
+
+// Kidinfo flags
+enum {
+	FRnoresize =	(1<<0),
+	FRnoscroll =	(1<<1),
+	FRhscroll = 	(1<<2),
+	FRvscroll =	(1<<3),
+	FRhscrollauto = (1<<4),
+	FRvscrollauto =	(1<<5)
+};
+
+// Information about child frame or frameset
+struct Kidinfo
+{
+	Kidinfo*		next;		// in list of kidinfos for a frameset
+	int			isframeset;
+
+	// fields for "frame"
+	Rune*		src;		// only nil if a "dummy" frame or this is frameset
+	Rune*		name;	// always non-empty if this isn't frameset
+	int			marginw;
+	int			marginh;
+	int			framebd;
+	int			flags;
+
+	// fields for "frameset"
+	Dimen*		rows;	// array of row dimensions
+	int			nrows;	// length of rows
+	Dimen*		cols;		// array of col dimensions
+	int			ncols;	// length of cols
+	Kidinfo*		kidinfos;
+	Kidinfo*		nextframeset;	// parsing stack
+};
+
+
+// Document info (global information about HTML page)
+struct Docinfo
+{
+	// stuff from HTTP headers, doc head, and body tag
+	Rune*		src;				// original source of doc
+	Rune*		base;			// base URL of doc
+	Rune*		doctitle;			// from <title> element
+	Background	background;		// background specification
+	Iimage*		backgrounditem;	// Image Item for doc background image, or nil
+	int			text;				// doc foreground (text) color
+	int			link;				// unvisited hyperlink color
+	int			vlink;			// visited hyperlink color
+	int			alink;			// highlighting hyperlink color
+	int			target;			// target frame default
+	int			chset;			// ISO_8859, etc.
+	int			mediatype;		// TextHtml, etc.
+	int			scripttype;		// TextJavascript, etc.
+	int			hasscripts;		// true if scripts used
+	Rune*		refresh;			// content of <http-equiv=Refresh ...>
+	Kidinfo*		kidinfo;			// if a frameset
+	int			frameid;			// id of document frame
+
+	// info needed to respond to user actions
+	Anchor*		anchors;			// list of href anchors
+	DestAnchor*	dests;			// list of destination anchors
+	Form*		forms;			// list of forms
+	Table*		tables;			// list of tables
+	Map*		maps;			// list of maps
+	Iimage*		images;			// list of image items (through nextimage links)
+};
+
+extern int			dimenkind(Dimen d);
+extern int			dimenspec(Dimen d);
+extern void		freedocinfo(Docinfo* d);
+extern void		freeitems(Item* ithead);
+extern Item*		parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
+extern void		printitems(Item* items, char* msg);
+extern int			targetid(Rune* s);
+extern Rune*		targetname(int targid);
+extern int			validitems(Item* i);
+
+#pragma varargck	type "I"	Item*
+
+// Control print output
+extern int			warn;
+extern int			dbglex;
+extern int			dbgbuild;
+
+// To be provided by caller
+// emalloc and erealloc should not return if can't get memory.
+// emalloc should zero its memory.
+extern void*	emalloc(ulong);
+extern void*	erealloc(void* p, ulong size);
+#ifdef __cpluspplus
+}
+#endif
+#endif