| #ifndef _HTML_H_ |
| #define _HTML_H_ 1 |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| AUTOLIB(html) |
| /* |
| #pragma lib "libhtml.a" |
| #pragma src "/sys/src/libhtml" |
| */ |
| |
| /* UTILS */ |
| extern uchar* fromStr(Rune* buf, int n, int chset); |
| extern Rune* toStr(uchar* buf, int n, int chset); |
| |
| /* Common LEX and BUILD enums */ |
| |
| /* Media types */ |
| enum |
| { |
| ApplMsword, |
| ApplOctets, |
| ApplPdf, |
| ApplPostscript, |
| ApplRtf, |
| ApplFramemaker, |
| ApplMsexcel, |
| ApplMspowerpoint, |
| UnknownType, |
| Audio32kadpcm, |
| AudioBasic, |
| ImageCgm, |
| ImageG3fax, |
| ImageGif, |
| ImageIef, |
| ImageJpeg, |
| ImagePng, |
| ImageTiff, |
| ImageXBit, |
| ImageXBit2, |
| ImageXBitmulti, |
| ImageXXBitmap, |
| ModelVrml, |
| MultiDigest, |
| MultiMixed, |
| TextCss, |
| TextEnriched, |
| TextHtml, |
| TextJavascript, |
| TextPlain, |
| TextRichtext, |
| TextSgml, |
| TextTabSeparatedValues, |
| TextXml, |
| VideoMpeg, |
| VideoQuicktime, |
| NMEDIATYPES |
| }; |
| |
| /* HTTP methods */ |
| enum |
| { |
| HGet, |
| HPost |
| }; |
| |
| /* Charsets */ |
| enum |
| { |
| UnknownCharset, |
| US_Ascii, |
| ISO_8859_1, |
| UTF_8, |
| Unicode, |
| NCHARSETS |
| }; |
| |
| /* Frame Target IDs */ |
| enum { |
| FTtop, |
| FTself, |
| FTparent, |
| FTblank |
| }; |
| |
| /* LEX */ |
| typedef struct Token Token; |
| typedef struct Attr Attr; |
| |
| /* BUILD */ |
| |
| typedef struct Item Item; |
| typedef struct Itext Itext; |
| typedef struct Irule Irule; |
| typedef struct Iimage Iimage; |
| typedef struct Iformfield Iformfield; |
| typedef struct Itable Itable; |
| typedef struct Ifloat Ifloat; |
| typedef struct Ispacer Ispacer; |
| typedef struct Genattr Genattr; |
| typedef struct SEvent SEvent; |
| typedef struct Formfield Formfield; |
| typedef struct Option Option; |
| typedef struct Form Form; |
| typedef struct Table Table; |
| typedef struct Tablecol Tablecol; |
| typedef struct Tablerow Tablerow; |
| typedef struct Tablecell Tablecell; |
| typedef struct Align Align; |
| typedef struct Dimen Dimen; |
| typedef struct Anchor Anchor; |
| typedef struct DestAnchor DestAnchor; |
| typedef struct Map Map; |
| typedef struct Area Area; |
| typedef struct Background Background; |
| typedef struct Kidinfo Kidinfo; |
| typedef struct Docinfo Docinfo; |
| typedef struct Stack Stack; |
| typedef struct Pstate Pstate; |
| typedef struct ItemSource ItemSource; |
| typedef struct Lay Lay; /* defined in Layout module */ |
| |
| /* Alignment types */ |
| enum { |
| ALnone = 0, ALleft, ALcenter, ALright, ALjustify, |
| ALchar, ALtop, ALmiddle, ALbottom, ALbaseline |
| }; |
| |
| struct Align |
| { |
| uchar halign; /* one of ALnone, ALleft, etc. */ |
| uchar valign; /* one of ALnone, ALtop, etc. */ |
| }; |
| |
| /* A Dimen holds a dimension specification, especially for those */ |
| /* cases when a number can be followed by a % or a * to indicate */ |
| /* percentage of total or relative weight. */ |
| /* Dnone means no dimension was specified */ |
| |
| /* To fit in a word, use top bits to identify kind, rest for value */ |
| enum { |
| Dnone = 0, |
| Dpixels = (1<<29), |
| Dpercent = (2<<29), |
| Drelative = (3<<29), |
| Dkindmask = (3<<29), |
| Dspecmask = (~Dkindmask) |
| }; |
| |
| struct Dimen |
| { |
| int kindspec; /* kind | spec */ |
| }; |
| |
| /* Background is either an image or a color. */ |
| /* If both are set, the image has precedence. */ |
| struct Background |
| { |
| Rune* image; /* url */ |
| int color; |
| }; |
| |
| |
| /* There are about a half dozen Item variants. */ |
| /* The all look like this at the start (using Plan 9 C's */ |
| /* anonymous structure member mechanism), */ |
| /* and then the tag field dictates what extra fields there are. */ |
| struct Item |
| { |
| Item* next; /* successor in list of items */ |
| int width; /* width in pixels (0 for floating items) */ |
| int height; /* height in pixels */ |
| Rectangle r; |
| int ascent; /* ascent (from top to baseline) in pixels */ |
| int anchorid; /* if nonzero, which anchor we're in */ |
| int state; /* flags and values (see below) */ |
| Genattr* genattr; /* generic attributes and events */ |
| int tag; /* variant discriminator: Itexttag, etc. */ |
| }; |
| |
| /* Item variant tags */ |
| enum { |
| Itexttag, |
| Iruletag, |
| Iimagetag, |
| Iformfieldtag, |
| Itabletag, |
| Ifloattag, |
| Ispacertag |
| }; |
| |
| struct Itext |
| { |
| Item item; /* (with tag ==Itexttag) */ |
| Rune* s; /* the characters */ |
| int fnt; /* style*NumSize+size (see font stuff, below) */ |
| int fg; /* Pixel (color) for text */ |
| uchar voff; /* Voffbias+vertical offset from baseline, in pixels (+ve == down) */ |
| uchar ul; /* ULnone, ULunder, or ULmid */ |
| }; |
| |
| struct Irule |
| { |
| Item item; /* (with tag ==Iruletag) */ |
| uchar align; /* alignment spec */ |
| uchar noshade; /* if true, don't shade */ |
| int size; /* size attr (rule height) */ |
| Dimen wspec; /* width spec */ |
| }; |
| |
| |
| struct Iimage |
| { |
| Item item; /* (with tag ==Iimagetag) */ |
| Rune* imsrc; /* image src url */ |
| int imwidth; /* spec width (actual, if no spec) */ |
| int imheight; /* spec height (actual, if no spec) */ |
| Rune* altrep; /* alternate representation, in absence of image */ |
| Map* map; /* if non-nil, client side map */ |
| int ctlid; /* if animated */ |
| uchar align; /* vertical alignment */ |
| uchar hspace; /* in pixels; buffer space on each side */ |
| uchar vspace; /* in pixels; buffer space on top and bottom */ |
| uchar border; /* in pixels: border width to draw around image */ |
| Iimage* nextimage; /* next in list of document's images */ |
| void *aux; |
| }; |
| |
| |
| struct Iformfield |
| { |
| Item item; /* (with tag ==Iformfieldtag) */ |
| Formfield* formfield; |
| void *aux; |
| }; |
| |
| |
| struct Itable |
| { |
| Item item; /* (with tag ==Itabletag) */ |
| Table* table; |
| }; |
| |
| |
| struct Ifloat |
| { |
| Item _item; /* (with tag ==Ifloattag) */ |
| Item* item; /* table or image item that floats */ |
| int x; /* x coord of top (from right, if ALright) */ |
| int y; /* y coord of top */ |
| uchar side; /* margin it floats to: ALleft or ALright */ |
| uchar infloats; /* true if this has been added to a lay.floats */ |
| Ifloat* nextfloat; /* in list of floats */ |
| }; |
| |
| |
| struct Ispacer |
| { |
| Item item; /* (with tag ==Ispacertag) */ |
| int spkind; /* ISPnull, etc. */ |
| }; |
| |
| /* Item state flags and value fields */ |
| enum { |
| /* IFbrk = 0x80000000, // forced break before this item */ |
| #define IFbrk 0x80000000 /* too big for sun */ |
| IFbrksp = 0x40000000, /* add 1 line space to break (IFbrk set too) */ |
| IFnobrk = 0x20000000, /* break not allowed before this item */ |
| IFcleft = 0x10000000, /* clear left floats (IFbrk set too) */ |
| IFcright = 0x08000000, /* clear right floats (IFbrk set too) */ |
| IFwrap = 0x04000000, /* in a wrapping (non-pre) line */ |
| IFhang = 0x02000000, /* in a hanging (into left indent) item */ |
| IFrjust = 0x01000000, /* right justify current line */ |
| IFcjust = 0x00800000, /* center justify current line */ |
| IFsmap = 0x00400000, /* image is server-side map */ |
| IFindentshift = 8, |
| IFindentmask = (255<<IFindentshift), /* current indent, in tab stops */ |
| IFhangmask = 255 /* current hang into left indent, in 1/10th tabstops */ |
| }; |
| |
| /* Bias added to Itext's voff field */ |
| enum { Voffbias = 128 }; |
| |
| /* Spacer kinds */ |
| enum { |
| ISPnull, /* 0 height and width */ |
| ISPvline, /* height and ascent of current font */ |
| ISPhspace, /* width of space in current font */ |
| ISPgeneral /* other purposes (e.g., between markers and list) */ |
| }; |
| |
| /* Generic attributes and events (not many elements will have any of these set) */ |
| struct Genattr |
| { |
| Rune* id; |
| Rune* class; |
| Rune* style; |
| Rune* title; |
| SEvent* events; |
| }; |
| |
| struct SEvent |
| { |
| SEvent* next; /* in list of events */ |
| int type; /* SEonblur, etc. */ |
| Rune* script; |
| }; |
| |
| enum { |
| SEonblur, SEonchange, SEonclick, SEondblclick, |
| SEonfocus, SEonkeypress, SEonkeyup, SEonload, |
| SEonmousedown, SEonmousemove, SEonmouseout, |
| SEonmouseover, SEonmouseup, SEonreset, SEonselect, |
| SEonsubmit, SEonunload, |
| Numscriptev |
| }; |
| |
| /* Form field types */ |
| enum { |
| Ftext, |
| Fpassword, |
| Fcheckbox, |
| Fradio, |
| Fsubmit, |
| Fhidden, |
| Fimage, |
| Freset, |
| Ffile, |
| Fbutton, |
| Fselect, |
| Ftextarea |
| }; |
| |
| /* Information about a field in a form */ |
| struct Formfield |
| { |
| Formfield* next; /* in list of fields for a form */ |
| int ftype; /* Ftext, Fpassword, etc. */ |
| int fieldid; /* serial no. of field within its form */ |
| Form* form; /* containing form */ |
| Rune* name; /* name attr */ |
| Rune* value; /* value attr */ |
| int size; /* size attr */ |
| int maxlength; /* maxlength attr */ |
| int rows; /* rows attr */ |
| int cols; /* cols attr */ |
| uchar flags; /* FFchecked, etc. */ |
| Option* options; /* for Fselect fields */ |
| Item* image; /* image item, for Fimage fields */ |
| int ctlid; /* identifies control for this field in layout */ |
| SEvent* events; /* same as genattr->events of containing item */ |
| }; |
| |
| enum { |
| FFchecked = (1<<7), |
| FFmultiple = (1<<6) |
| }; |
| |
| /* Option holds info about an option in a "select" form field */ |
| struct Option |
| { |
| Option* next; /* next in list of options for a field */ |
| int selected; /* true if selected initially */ |
| Rune* value; /* value attr */ |
| Rune* display; /* display string */ |
| }; |
| |
| /* Form holds info about a form */ |
| struct Form |
| { |
| Form* next; /* in list of forms for document */ |
| int formid; /* serial no. of form within its doc */ |
| Rune* name; /* name or id attr (netscape uses name, HTML 4.0 uses id) */ |
| Rune* action; /* action attr */ |
| int target; /* target attr as targetid */ |
| int method; /* HGet or HPost */ |
| int nfields; /* number of fields */ |
| Formfield* fields; /* field's forms, in input order */ |
| }; |
| |
| /* Flags used in various table structures */ |
| enum { |
| TFparsing = (1<<7), |
| TFnowrap = (1<<6), |
| TFisth = (1<<5) |
| }; |
| |
| |
| /* Information about a table */ |
| struct Table |
| { |
| Table* next; /* next in list of document's tables */ |
| int tableid; /* serial no. of table within its doc */ |
| Tablerow* rows; /* array of row specs (list during parsing) */ |
| int nrow; /* total number of rows */ |
| Tablecol* cols; /* array of column specs */ |
| int ncol; /* total number of columns */ |
| Tablecell* cells; /* list of unique cells */ |
| int ncell; /* total number of cells */ |
| Tablecell*** grid; /* 2-D array of cells */ |
| Align align; /* alignment spec for whole table */ |
| Dimen width; /* width spec for whole table */ |
| int border; /* border attr */ |
| int cellspacing; /* cellspacing attr */ |
| int cellpadding; /* cellpadding attr */ |
| Background background; /* table background */ |
| Item* caption; /* linked list of Items, giving caption */ |
| uchar caption_place; /* ALtop or ALbottom */ |
| Lay* caption_lay; /* layout of caption */ |
| int totw; /* total width */ |
| int toth; /* total height */ |
| int caph; /* caption height */ |
| int availw; /* used for previous 3 sizes */ |
| Token* tabletok; /* token that started the table */ |
| uchar flags; /* Lchanged, perhaps */ |
| }; |
| |
| |
| struct Tablecol |
| { |
| int width; |
| Align align; |
| Point pos; |
| }; |
| |
| |
| struct Tablerow |
| { |
| Tablerow* next; /* Next in list of rows, during parsing */ |
| Tablecell* cells; /* Cells in row, linked through nextinrow */ |
| int height; |
| int ascent; |
| Align align; |
| Background background; |
| Point pos; |
| uchar flags; /* 0 or TFparsing */ |
| }; |
| |
| |
| /* A Tablecell is one cell of a table. */ |
| /* It may span multiple rows and multiple columns. */ |
| /* Cells are linked on two lists: the list for all the cells of */ |
| /* a document (the next pointers), and the list of all the */ |
| /* cells that start in a given row (the nextinrow pointers) */ |
| struct Tablecell |
| { |
| Tablecell* next; /* next in list of table's cells */ |
| Tablecell* nextinrow; /* next in list of row's cells */ |
| int cellid; /* serial no. of cell within table */ |
| Item* content; /* contents before layout */ |
| Lay* lay; /* layout of cell */ |
| int rowspan; /* number of rows spanned by this cell */ |
| int colspan; /* number of cols spanned by this cell */ |
| Align align; /* alignment spec */ |
| uchar flags; /* TFparsing, TFnowrap, TFisth */ |
| Dimen wspec; /* suggested width */ |
| int hspec; /* suggested height */ |
| Background background; /* cell background */ |
| int minw; /* minimum possible width */ |
| int maxw; /* maximum width */ |
| int ascent; /* cell's ascent */ |
| int row; /* row of upper left corner */ |
| int col; /* col of upper left corner */ |
| Point pos; /* nw corner of cell contents, in cell */ |
| }; |
| |
| /* Anchor is for info about hyperlinks that go somewhere */ |
| struct Anchor |
| { |
| Anchor* next; /* next in list of document's anchors */ |
| int index; /* serial no. of anchor within its doc */ |
| Rune* name; /* name attr */ |
| Rune* href; /* href attr */ |
| int target; /* target attr as targetid */ |
| }; |
| |
| |
| /* DestAnchor is for info about hyperlinks that are destinations */ |
| struct DestAnchor |
| { |
| DestAnchor* next; /* next in list of document's destanchors */ |
| int index; /* serial no. of anchor within its doc */ |
| Rune* name; /* name attr */ |
| Item* item; /* the destination */ |
| }; |
| |
| |
| /* Maps (client side) */ |
| struct Map |
| { |
| Map* next; /* next in list of document's maps */ |
| Rune* name; /* map name */ |
| Area* areas; /* list of map areas */ |
| }; |
| |
| |
| struct Area |
| { |
| Area* next; /* next in list of a map's areas */ |
| int shape; /* SHrect, etc. */ |
| Rune* href; /* associated hypertext link */ |
| int target; /* associated target frame */ |
| Dimen* coords; /* array of coords for shape */ |
| int ncoords; /* size of coords array */ |
| }; |
| |
| /* Area shapes */ |
| enum { |
| SHrect, SHcircle, SHpoly |
| }; |
| |
| /* Fonts are represented by integers: style*NumSize + size */ |
| |
| /* Font styles */ |
| enum { |
| FntR, /* roman */ |
| FntI, /* italic */ |
| FntB, /* bold */ |
| FntT, /* typewriter */ |
| NumStyle |
| }; |
| |
| /* Font sizes */ |
| enum { |
| Tiny, |
| Small, |
| Normal, |
| Large, |
| Verylarge, |
| NumSize |
| }; |
| |
| enum { |
| NumFnt = (NumStyle*NumSize), |
| DefFnt = (FntR*NumSize+Normal) |
| }; |
| |
| /* Lines are needed through some text items, for underlining or strikethrough */ |
| enum { |
| ULnone, ULunder, ULmid |
| }; |
| |
| /* Kidinfo flags */ |
| enum { |
| FRnoresize = (1<<0), |
| FRnoscroll = (1<<1), |
| FRhscroll = (1<<2), |
| FRvscroll = (1<<3), |
| FRhscrollauto = (1<<4), |
| FRvscrollauto = (1<<5) |
| }; |
| |
| /* Information about child frame or frameset */ |
| struct Kidinfo |
| { |
| Kidinfo* next; /* in list of kidinfos for a frameset */ |
| int isframeset; |
| |
| /* fields for "frame" */ |
| Rune* src; /* only nil if a "dummy" frame or this is frameset */ |
| Rune* name; /* always non-empty if this isn't frameset */ |
| int marginw; |
| int marginh; |
| int framebd; |
| int flags; |
| |
| /* fields for "frameset" */ |
| Dimen* rows; /* array of row dimensions */ |
| int nrows; /* length of rows */ |
| Dimen* cols; /* array of col dimensions */ |
| int ncols; /* length of cols */ |
| Kidinfo* kidinfos; |
| Kidinfo* nextframeset; /* parsing stack */ |
| }; |
| |
| |
| /* Document info (global information about HTML page) */ |
| struct Docinfo |
| { |
| /* stuff from HTTP headers, doc head, and body tag */ |
| Rune* src; /* original source of doc */ |
| Rune* base; /* base URL of doc */ |
| Rune* doctitle; /* from <title> element */ |
| Background background; /* background specification */ |
| Iimage* backgrounditem; /* Image Item for doc background image, or nil */ |
| int text; /* doc foreground (text) color */ |
| int link; /* unvisited hyperlink color */ |
| int vlink; /* visited hyperlink color */ |
| int alink; /* highlighting hyperlink color */ |
| int target; /* target frame default */ |
| int chset; /* ISO_8859, etc. */ |
| int mediatype; /* TextHtml, etc. */ |
| int scripttype; /* TextJavascript, etc. */ |
| int hasscripts; /* true if scripts used */ |
| Rune* refresh; /* content of <http-equiv=Refresh ...> */ |
| Kidinfo* kidinfo; /* if a frameset */ |
| int frameid; /* id of document frame */ |
| |
| /* info needed to respond to user actions */ |
| Anchor* anchors; /* list of href anchors */ |
| DestAnchor* dests; /* list of destination anchors */ |
| Form* forms; /* list of forms */ |
| Table* tables; /* list of tables */ |
| Map* maps; /* list of maps */ |
| Iimage* images; /* list of image items (through nextimage links) */ |
| }; |
| |
| extern int dimenkind(Dimen d); |
| extern int dimenspec(Dimen d); |
| extern void freedocinfo(Docinfo* d); |
| extern void freeitems(Item* ithead); |
| extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi); |
| extern void printitems(Item* items, char* msg); |
| extern int targetid(Rune* s); |
| extern Rune* targetname(int targid); |
| extern int validitems(Item* i); |
| |
| /* #pragma varargck type "I" Item* */ |
| |
| /* Control print output */ |
| extern int warn; |
| extern int dbglex; |
| extern int dbgbuild; |
| |
| /* To be provided by caller */ |
| /* emalloc and erealloc should not return if can't get memory. */ |
| /* emalloc should zero its memory. */ |
| extern void* emalloc(ulong); |
| extern void* erealloc(void* p, ulong size); |
| #ifdef __cpluspplus |
| } |
| #endif |
| #endif |