blob: 1d5dd6b2d7e6223b357be323a729098c5e2f93bf [file] [log] [blame]
rsc7f111042003-12-11 18:15:57 +00001#ifndef _HTML_H_
2#define _HTML_H_ 1
3#ifdef __cplusplus
4extern "C" {
5#endif
6
rsc1a0954a2005-01-04 21:18:08 +00007AUTOLIB(html)
rsc7f111042003-12-11 18:15:57 +00008/*
9 #pragma lib "libhtml.a"
10 #pragma src "/sys/src/libhtml"
11*/
12
13// UTILS
14extern uchar* fromStr(Rune* buf, int n, int chset);
15extern Rune* toStr(uchar* buf, int n, int chset);
16
17// Common LEX and BUILD enums
18
19// Media types
20enum
21{
22 ApplMsword,
23 ApplOctets,
24 ApplPdf,
25 ApplPostscript,
26 ApplRtf,
27 ApplFramemaker,
28 ApplMsexcel,
29 ApplMspowerpoint,
30 UnknownType,
31 Audio32kadpcm,
32 AudioBasic,
33 ImageCgm,
34 ImageG3fax,
35 ImageGif,
36 ImageIef,
37 ImageJpeg,
38 ImagePng,
39 ImageTiff,
40 ImageXBit,
41 ImageXBit2,
42 ImageXBitmulti,
43 ImageXXBitmap,
44 ModelVrml,
45 MultiDigest,
46 MultiMixed,
47 TextCss,
48 TextEnriched,
49 TextHtml,
50 TextJavascript,
51 TextPlain,
52 TextRichtext,
53 TextSgml,
54 TextTabSeparatedValues,
55 TextXml,
56 VideoMpeg,
57 VideoQuicktime,
58 NMEDIATYPES
59};
60
61// HTTP methods
62enum
63{
64 HGet,
65 HPost
66};
67
68// Charsets
69enum
70{
71 UnknownCharset,
72 US_Ascii,
73 ISO_8859_1,
74 UTF_8,
75 Unicode,
76 NCHARSETS
77};
78
79// Frame Target IDs
80enum {
81 FTtop,
82 FTself,
83 FTparent,
84 FTblank
85};
86
87// LEX
88typedef struct Token Token;
89typedef struct Attr Attr;
90
91// BUILD
92
93typedef struct Item Item;
94typedef struct Itext Itext;
95typedef struct Irule Irule;
96typedef struct Iimage Iimage;
97typedef struct Iformfield Iformfield;
98typedef struct Itable Itable;
99typedef struct Ifloat Ifloat;
100typedef struct Ispacer Ispacer;
101typedef struct Genattr Genattr;
102typedef struct SEvent SEvent;
103typedef struct Formfield Formfield;
104typedef struct Option Option;
105typedef struct Form Form;
106typedef struct Table Table;
107typedef struct Tablecol Tablecol;
108typedef struct Tablerow Tablerow;
109typedef struct Tablecell Tablecell;
110typedef struct Align Align;
111typedef struct Dimen Dimen;
112typedef struct Anchor Anchor;
113typedef struct DestAnchor DestAnchor;
114typedef struct Map Map;
115typedef struct Area Area;
116typedef struct Background Background;
117typedef struct Kidinfo Kidinfo;
118typedef struct Docinfo Docinfo;
119typedef struct Stack Stack;
120typedef struct Pstate Pstate;
121typedef struct ItemSource ItemSource;
122typedef struct Lay Lay; // defined in Layout module
123
124// Alignment types
125enum {
126 ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
127 ALchar, ALtop, ALmiddle, ALbottom, ALbaseline
128};
129
130struct Align
131{
132 uchar halign; // one of ALnone, ALleft, etc.
133 uchar valign; // one of ALnone, ALtop, etc.
134};
135
136// A Dimen holds a dimension specification, especially for those
137// cases when a number can be followed by a % or a * to indicate
138// percentage of total or relative weight.
139// Dnone means no dimension was specified
140
141// To fit in a word, use top bits to identify kind, rest for value
142enum {
143 Dnone = 0,
144 Dpixels = (1<<29),
145 Dpercent = (2<<29),
146 Drelative = (3<<29),
147 Dkindmask = (3<<29),
148 Dspecmask = (~Dkindmask)
149};
150
151struct Dimen
152{
153 int kindspec; // kind | spec
154};
155
156// Background is either an image or a color.
157// If both are set, the image has precedence.
158struct Background
159{
160 Rune* image; // url
161 int color;
162};
163
164
165// There are about a half dozen Item variants.
166// The all look like this at the start (using Plan 9 C's
167// anonymous structure member mechanism),
168// and then the tag field dictates what extra fields there are.
169struct Item
170{
171 Item* next; // successor in list of items
172 int width; // width in pixels (0 for floating items)
173 int height; // height in pixels
174 int ascent; // ascent (from top to baseline) in pixels
175 int anchorid; // if nonzero, which anchor we're in
176 int state; // flags and values (see below)
177 Genattr* genattr; // generic attributes and events
178 int tag; // variant discriminator: Itexttag, etc.
179};
180
181// Item variant tags
182enum {
183 Itexttag,
184 Iruletag,
185 Iimagetag,
186 Iformfieldtag,
187 Itabletag,
188 Ifloattag,
189 Ispacertag
190};
191
192struct Itext
193{
rsc650deb72004-04-08 19:36:35 +0000194 Item item; // (with tag ==Itexttag)
rsc7f111042003-12-11 18:15:57 +0000195 Rune* s; // the characters
196 int fnt; // style*NumSize+size (see font stuff, below)
197 int fg; // Pixel (color) for text
198 uchar voff; // Voffbias+vertical offset from baseline, in pixels (+ve == down)
199 uchar ul; // ULnone, ULunder, or ULmid
200};
201
202struct Irule
203{
rsc650deb72004-04-08 19:36:35 +0000204 Item item; // (with tag ==Iruletag)
rsc7f111042003-12-11 18:15:57 +0000205 uchar align; // alignment spec
206 uchar noshade; // if true, don't shade
207 int size; // size attr (rule height)
208 Dimen wspec; // width spec
209};
210
211
212struct Iimage
213{
rsc650deb72004-04-08 19:36:35 +0000214 Item item; // (with tag ==Iimagetag)
rsc7f111042003-12-11 18:15:57 +0000215 Rune* imsrc; // image src url
216 int imwidth; // spec width (actual, if no spec)
217 int imheight; // spec height (actual, if no spec)
218 Rune* altrep; // alternate representation, in absence of image
219 Map* map; // if non-nil, client side map
220 int ctlid; // if animated
221 uchar align; // vertical alignment
222 uchar hspace; // in pixels; buffer space on each side
223 uchar vspace; // in pixels; buffer space on top and bottom
224 uchar border; // in pixels: border width to draw around image
225 Iimage* nextimage; // next in list of document's images
226};
227
228
229struct Iformfield
230{
rsc650deb72004-04-08 19:36:35 +0000231 Item item; // (with tag ==Iformfieldtag)
rsc7f111042003-12-11 18:15:57 +0000232 Formfield* formfield;
233};
234
235
236struct Itable
237{
rsc650deb72004-04-08 19:36:35 +0000238 Item item; // (with tag ==Itabletag)
rsc7f111042003-12-11 18:15:57 +0000239 Table* table;
240};
241
242
243struct Ifloat
244{
245 Item _item; // (with tag ==Ifloattag)
246 Item* item; // table or image item that floats
247 int x; // x coord of top (from right, if ALright)
248 int y; // y coord of top
249 uchar side; // margin it floats to: ALleft or ALright
250 uchar infloats; // true if this has been added to a lay.floats
251 Ifloat* nextfloat; // in list of floats
252};
253
254
255struct Ispacer
256{
rsc650deb72004-04-08 19:36:35 +0000257 Item item; // (with tag ==Ispacertag)
rsc7f111042003-12-11 18:15:57 +0000258 int spkind; // ISPnull, etc.
259};
260
261// Item state flags and value fields
262enum {
rsca0f1e212004-04-20 02:03:38 +0000263// IFbrk = 0x80000000, // forced break before this item
264#define IFbrk 0x80000000 /* too big for sun */
rsc7f111042003-12-11 18:15:57 +0000265 IFbrksp = 0x40000000, // add 1 line space to break (IFbrk set too)
266 IFnobrk = 0x20000000, // break not allowed before this item
267 IFcleft = 0x10000000, // clear left floats (IFbrk set too)
268 IFcright = 0x08000000, // clear right floats (IFbrk set too)
269 IFwrap = 0x04000000, // in a wrapping (non-pre) line
270 IFhang = 0x02000000, // in a hanging (into left indent) item
271 IFrjust = 0x01000000, // right justify current line
272 IFcjust = 0x00800000, // center justify current line
273 IFsmap = 0x00400000, // image is server-side map
274 IFindentshift = 8,
275 IFindentmask = (255<<IFindentshift), // current indent, in tab stops
276 IFhangmask = 255 // current hang into left indent, in 1/10th tabstops
277};
278
279// Bias added to Itext's voff field
280enum { Voffbias = 128 };
281
282// Spacer kinds
283enum {
284 ISPnull, // 0 height and width
285 ISPvline, // height and ascent of current font
286 ISPhspace, // width of space in current font
287 ISPgeneral // other purposes (e.g., between markers and list)
288};
289
290// Generic attributes and events (not many elements will have any of these set)
291struct Genattr
292{
293 Rune* id;
294 Rune* class;
295 Rune* style;
296 Rune* title;
297 SEvent* events;
298};
299
300struct SEvent
301{
302 SEvent* next; // in list of events
303 int type; // SEonblur, etc.
304 Rune* script;
305};
306
307enum {
308 SEonblur, SEonchange, SEonclick, SEondblclick,
309 SEonfocus, SEonkeypress, SEonkeyup, SEonload,
310 SEonmousedown, SEonmousemove, SEonmouseout,
311 SEonmouseover, SEonmouseup, SEonreset, SEonselect,
312 SEonsubmit, SEonunload,
313 Numscriptev
314};
315
316// Form field types
317enum {
318 Ftext,
319 Fpassword,
320 Fcheckbox,
321 Fradio,
322 Fsubmit,
323 Fhidden,
324 Fimage,
325 Freset,
326 Ffile,
327 Fbutton,
328 Fselect,
329 Ftextarea
330};
331
332// Information about a field in a form
333struct Formfield
334{
335 Formfield* next; // in list of fields for a form
336 int ftype; // Ftext, Fpassword, etc.
337 int fieldid; // serial no. of field within its form
338 Form* form; // containing form
339 Rune* name; // name attr
340 Rune* value; // value attr
341 int size; // size attr
342 int maxlength; // maxlength attr
343 int rows; // rows attr
344 int cols; // cols attr
345 uchar flags; // FFchecked, etc.
346 Option* options; // for Fselect fields
347 Item* image; // image item, for Fimage fields
348 int ctlid; // identifies control for this field in layout
349 SEvent* events; // same as genattr->events of containing item
350};
351
352enum {
353 FFchecked = (1<<7),
354 FFmultiple = (1<<6)
355};
356
357// Option holds info about an option in a "select" form field
358struct Option
359{
360 Option* next; // next in list of options for a field
361 int selected; // true if selected initially
362 Rune* value; // value attr
363 Rune* display; // display string
364};
365
366// Form holds info about a form
367struct Form
368{
369 Form* next; // in list of forms for document
370 int formid; // serial no. of form within its doc
371 Rune* name; // name or id attr (netscape uses name, HTML 4.0 uses id)
372 Rune* action; // action attr
373 int target; // target attr as targetid
374 int method; // HGet or HPost
375 int nfields; // number of fields
376 Formfield* fields; // field's forms, in input order
377};
378
379// Flags used in various table structures
380enum {
381 TFparsing = (1<<7),
382 TFnowrap = (1<<6),
383 TFisth = (1<<5)
384};
385
386
387// Information about a table
388struct Table
389{
390 Table* next; // next in list of document's tables
391 int tableid; // serial no. of table within its doc
392 Tablerow* rows; // array of row specs (list during parsing)
393 int nrow; // total number of rows
394 Tablecol* cols; // array of column specs
395 int ncol; // total number of columns
396 Tablecell* cells; // list of unique cells
397 int ncell; // total number of cells
398 Tablecell*** grid; // 2-D array of cells
399 Align align; // alignment spec for whole table
400 Dimen width; // width spec for whole table
401 int border; // border attr
402 int cellspacing; // cellspacing attr
403 int cellpadding; // cellpadding attr
404 Background background; // table background
405 Item* caption; // linked list of Items, giving caption
406 uchar caption_place; // ALtop or ALbottom
407 Lay* caption_lay; // layout of caption
408 int totw; // total width
409 int toth; // total height
410 int caph; // caption height
411 int availw; // used for previous 3 sizes
412 Token* tabletok; // token that started the table
413 uchar flags; // Lchanged, perhaps
414};
415
416
417struct Tablecol
418{
419 int width;
420 Align align;
421 Point pos;
422};
423
424
425struct Tablerow
426{
427 Tablerow* next; // Next in list of rows, during parsing
428 Tablecell* cells; // Cells in row, linked through nextinrow
429 int height;
430 int ascent;
431 Align align;
432 Background background;
433 Point pos;
434 uchar flags; // 0 or TFparsing
435};
436
437
438// A Tablecell is one cell of a table.
439// It may span multiple rows and multiple columns.
440// Cells are linked on two lists: the list for all the cells of
441// a document (the next pointers), and the list of all the
442// cells that start in a given row (the nextinrow pointers)
443struct Tablecell
444{
445 Tablecell* next; // next in list of table's cells
446 Tablecell* nextinrow; // next in list of row's cells
447 int cellid; // serial no. of cell within table
448 Item* content; // contents before layout
449 Lay* lay; // layout of cell
450 int rowspan; // number of rows spanned by this cell
451 int colspan; // number of cols spanned by this cell
452 Align align; // alignment spec
453 uchar flags; // TFparsing, TFnowrap, TFisth
454 Dimen wspec; // suggested width
455 int hspec; // suggested height
456 Background background; // cell background
457 int minw; // minimum possible width
458 int maxw; // maximum width
459 int ascent; // cell's ascent
460 int row; // row of upper left corner
461 int col; // col of upper left corner
462 Point pos; // nw corner of cell contents, in cell
463};
464
465// Anchor is for info about hyperlinks that go somewhere
466struct Anchor
467{
468 Anchor* next; // next in list of document's anchors
469 int index; // serial no. of anchor within its doc
470 Rune* name; // name attr
471 Rune* href; // href attr
472 int target; // target attr as targetid
473};
474
475
476// DestAnchor is for info about hyperlinks that are destinations
477struct DestAnchor
478{
479 DestAnchor* next; // next in list of document's destanchors
480 int index; // serial no. of anchor within its doc
481 Rune* name; // name attr
482 Item* item; // the destination
483};
484
485
486// Maps (client side)
487struct Map
488{
489 Map* next; // next in list of document's maps
490 Rune* name; // map name
491 Area* areas; // list of map areas
492};
493
494
495struct Area
496{
497 Area* next; // next in list of a map's areas
498 int shape; // SHrect, etc.
499 Rune* href; // associated hypertext link
500 int target; // associated target frame
501 Dimen* coords; // array of coords for shape
502 int ncoords; // size of coords array
503};
504
505// Area shapes
506enum {
507 SHrect, SHcircle, SHpoly
508};
509
510// Fonts are represented by integers: style*NumSize + size
511
512// Font styles
513enum {
514 FntR, // roman
515 FntI, // italic
516 FntB, // bold
517 FntT, // typewriter
518 NumStyle
519};
520
521// Font sizes
522enum {
523 Tiny,
524 Small,
525 Normal,
526 Large,
527 Verylarge,
528 NumSize
529};
530
531enum {
532 NumFnt = (NumStyle*NumSize),
533 DefFnt = (FntR*NumSize+Normal)
534};
535
536// Lines are needed through some text items, for underlining or strikethrough
537enum {
538 ULnone, ULunder, ULmid
539};
540
541// Kidinfo flags
542enum {
543 FRnoresize = (1<<0),
544 FRnoscroll = (1<<1),
545 FRhscroll = (1<<2),
546 FRvscroll = (1<<3),
547 FRhscrollauto = (1<<4),
548 FRvscrollauto = (1<<5)
549};
550
551// Information about child frame or frameset
552struct Kidinfo
553{
554 Kidinfo* next; // in list of kidinfos for a frameset
555 int isframeset;
556
557 // fields for "frame"
558 Rune* src; // only nil if a "dummy" frame or this is frameset
559 Rune* name; // always non-empty if this isn't frameset
560 int marginw;
561 int marginh;
562 int framebd;
563 int flags;
564
565 // fields for "frameset"
566 Dimen* rows; // array of row dimensions
567 int nrows; // length of rows
568 Dimen* cols; // array of col dimensions
569 int ncols; // length of cols
570 Kidinfo* kidinfos;
571 Kidinfo* nextframeset; // parsing stack
572};
573
574
575// Document info (global information about HTML page)
576struct Docinfo
577{
578 // stuff from HTTP headers, doc head, and body tag
579 Rune* src; // original source of doc
580 Rune* base; // base URL of doc
581 Rune* doctitle; // from <title> element
582 Background background; // background specification
583 Iimage* backgrounditem; // Image Item for doc background image, or nil
584 int text; // doc foreground (text) color
585 int link; // unvisited hyperlink color
586 int vlink; // visited hyperlink color
587 int alink; // highlighting hyperlink color
588 int target; // target frame default
589 int chset; // ISO_8859, etc.
590 int mediatype; // TextHtml, etc.
591 int scripttype; // TextJavascript, etc.
592 int hasscripts; // true if scripts used
593 Rune* refresh; // content of <http-equiv=Refresh ...>
594 Kidinfo* kidinfo; // if a frameset
595 int frameid; // id of document frame
596
597 // info needed to respond to user actions
598 Anchor* anchors; // list of href anchors
599 DestAnchor* dests; // list of destination anchors
600 Form* forms; // list of forms
601 Table* tables; // list of tables
602 Map* maps; // list of maps
603 Iimage* images; // list of image items (through nextimage links)
604};
605
606extern int dimenkind(Dimen d);
607extern int dimenspec(Dimen d);
608extern void freedocinfo(Docinfo* d);
609extern void freeitems(Item* ithead);
610extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
611extern void printitems(Item* items, char* msg);
612extern int targetid(Rune* s);
613extern Rune* targetname(int targid);
614extern int validitems(Item* i);
615
rsc650deb72004-04-08 19:36:35 +0000616/* #pragma varargck type "I" Item* */
rsc7f111042003-12-11 18:15:57 +0000617
618// Control print output
619extern int warn;
620extern int dbglex;
621extern int dbgbuild;
622
623// To be provided by caller
624// emalloc and erealloc should not return if can't get memory.
625// emalloc should zero its memory.
626extern void* emalloc(ulong);
627extern void* erealloc(void* p, ulong size);
628#ifdef __cpluspplus
629}
630#endif
631#endif