blob: 0bcf3b5c8fd371aa81bcd8f94c7c367d2b71470b [file] [log] [blame]
rsc7f111042003-12-11 18:15:57 +00001#ifndef _HTML_H_
2#define _HTML_H_ 1
3#ifdef __cplusplus
4extern "C" {
5#endif
6
7/*
8 #pragma lib "libhtml.a"
9 #pragma src "/sys/src/libhtml"
10*/
11
12// UTILS
13extern uchar* fromStr(Rune* buf, int n, int chset);
14extern Rune* toStr(uchar* buf, int n, int chset);
15
16// Common LEX and BUILD enums
17
18// Media types
19enum
20{
21 ApplMsword,
22 ApplOctets,
23 ApplPdf,
24 ApplPostscript,
25 ApplRtf,
26 ApplFramemaker,
27 ApplMsexcel,
28 ApplMspowerpoint,
29 UnknownType,
30 Audio32kadpcm,
31 AudioBasic,
32 ImageCgm,
33 ImageG3fax,
34 ImageGif,
35 ImageIef,
36 ImageJpeg,
37 ImagePng,
38 ImageTiff,
39 ImageXBit,
40 ImageXBit2,
41 ImageXBitmulti,
42 ImageXXBitmap,
43 ModelVrml,
44 MultiDigest,
45 MultiMixed,
46 TextCss,
47 TextEnriched,
48 TextHtml,
49 TextJavascript,
50 TextPlain,
51 TextRichtext,
52 TextSgml,
53 TextTabSeparatedValues,
54 TextXml,
55 VideoMpeg,
56 VideoQuicktime,
57 NMEDIATYPES
58};
59
60// HTTP methods
61enum
62{
63 HGet,
64 HPost
65};
66
67// Charsets
68enum
69{
70 UnknownCharset,
71 US_Ascii,
72 ISO_8859_1,
73 UTF_8,
74 Unicode,
75 NCHARSETS
76};
77
78// Frame Target IDs
79enum {
80 FTtop,
81 FTself,
82 FTparent,
83 FTblank
84};
85
86// LEX
87typedef struct Token Token;
88typedef struct Attr Attr;
89
90// BUILD
91
92typedef struct Item Item;
93typedef struct Itext Itext;
94typedef struct Irule Irule;
95typedef struct Iimage Iimage;
96typedef struct Iformfield Iformfield;
97typedef struct Itable Itable;
98typedef struct Ifloat Ifloat;
99typedef struct Ispacer Ispacer;
100typedef struct Genattr Genattr;
101typedef struct SEvent SEvent;
102typedef struct Formfield Formfield;
103typedef struct Option Option;
104typedef struct Form Form;
105typedef struct Table Table;
106typedef struct Tablecol Tablecol;
107typedef struct Tablerow Tablerow;
108typedef struct Tablecell Tablecell;
109typedef struct Align Align;
110typedef struct Dimen Dimen;
111typedef struct Anchor Anchor;
112typedef struct DestAnchor DestAnchor;
113typedef struct Map Map;
114typedef struct Area Area;
115typedef struct Background Background;
116typedef struct Kidinfo Kidinfo;
117typedef struct Docinfo Docinfo;
118typedef struct Stack Stack;
119typedef struct Pstate Pstate;
120typedef struct ItemSource ItemSource;
121typedef struct Lay Lay; // defined in Layout module
122
123// Alignment types
124enum {
125 ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
126 ALchar, ALtop, ALmiddle, ALbottom, ALbaseline
127};
128
129struct Align
130{
131 uchar halign; // one of ALnone, ALleft, etc.
132 uchar valign; // one of ALnone, ALtop, etc.
133};
134
135// A Dimen holds a dimension specification, especially for those
136// cases when a number can be followed by a % or a * to indicate
137// percentage of total or relative weight.
138// Dnone means no dimension was specified
139
140// To fit in a word, use top bits to identify kind, rest for value
141enum {
142 Dnone = 0,
143 Dpixels = (1<<29),
144 Dpercent = (2<<29),
145 Drelative = (3<<29),
146 Dkindmask = (3<<29),
147 Dspecmask = (~Dkindmask)
148};
149
150struct Dimen
151{
152 int kindspec; // kind | spec
153};
154
155// Background is either an image or a color.
156// If both are set, the image has precedence.
157struct Background
158{
159 Rune* image; // url
160 int color;
161};
162
163
164// There are about a half dozen Item variants.
165// The all look like this at the start (using Plan 9 C's
166// anonymous structure member mechanism),
167// and then the tag field dictates what extra fields there are.
168struct Item
169{
170 Item* next; // successor in list of items
171 int width; // width in pixels (0 for floating items)
172 int height; // height in pixels
173 int ascent; // ascent (from top to baseline) in pixels
174 int anchorid; // if nonzero, which anchor we're in
175 int state; // flags and values (see below)
176 Genattr* genattr; // generic attributes and events
177 int tag; // variant discriminator: Itexttag, etc.
178};
179
180// Item variant tags
181enum {
182 Itexttag,
183 Iruletag,
184 Iimagetag,
185 Iformfieldtag,
186 Itabletag,
187 Ifloattag,
188 Ispacertag
189};
190
191struct Itext
192{
rsc650deb72004-04-08 19:36:35 +0000193 Item item; // (with tag ==Itexttag)
rsc7f111042003-12-11 18:15:57 +0000194 Rune* s; // the characters
195 int fnt; // style*NumSize+size (see font stuff, below)
196 int fg; // Pixel (color) for text
197 uchar voff; // Voffbias+vertical offset from baseline, in pixels (+ve == down)
198 uchar ul; // ULnone, ULunder, or ULmid
199};
200
201struct Irule
202{
rsc650deb72004-04-08 19:36:35 +0000203 Item item; // (with tag ==Iruletag)
rsc7f111042003-12-11 18:15:57 +0000204 uchar align; // alignment spec
205 uchar noshade; // if true, don't shade
206 int size; // size attr (rule height)
207 Dimen wspec; // width spec
208};
209
210
211struct Iimage
212{
rsc650deb72004-04-08 19:36:35 +0000213 Item item; // (with tag ==Iimagetag)
rsc7f111042003-12-11 18:15:57 +0000214 Rune* imsrc; // image src url
215 int imwidth; // spec width (actual, if no spec)
216 int imheight; // spec height (actual, if no spec)
217 Rune* altrep; // alternate representation, in absence of image
218 Map* map; // if non-nil, client side map
219 int ctlid; // if animated
220 uchar align; // vertical alignment
221 uchar hspace; // in pixels; buffer space on each side
222 uchar vspace; // in pixels; buffer space on top and bottom
223 uchar border; // in pixels: border width to draw around image
224 Iimage* nextimage; // next in list of document's images
225};
226
227
228struct Iformfield
229{
rsc650deb72004-04-08 19:36:35 +0000230 Item item; // (with tag ==Iformfieldtag)
rsc7f111042003-12-11 18:15:57 +0000231 Formfield* formfield;
232};
233
234
235struct Itable
236{
rsc650deb72004-04-08 19:36:35 +0000237 Item item; // (with tag ==Itabletag)
rsc7f111042003-12-11 18:15:57 +0000238 Table* table;
239};
240
241
242struct Ifloat
243{
244 Item _item; // (with tag ==Ifloattag)
245 Item* item; // table or image item that floats
246 int x; // x coord of top (from right, if ALright)
247 int y; // y coord of top
248 uchar side; // margin it floats to: ALleft or ALright
249 uchar infloats; // true if this has been added to a lay.floats
250 Ifloat* nextfloat; // in list of floats
251};
252
253
254struct Ispacer
255{
rsc650deb72004-04-08 19:36:35 +0000256 Item item; // (with tag ==Ispacertag)
rsc7f111042003-12-11 18:15:57 +0000257 int spkind; // ISPnull, etc.
258};
259
260// Item state flags and value fields
261enum {
rsca0f1e212004-04-20 02:03:38 +0000262// IFbrk = 0x80000000, // forced break before this item
263#define IFbrk 0x80000000 /* too big for sun */
rsc7f111042003-12-11 18:15:57 +0000264 IFbrksp = 0x40000000, // add 1 line space to break (IFbrk set too)
265 IFnobrk = 0x20000000, // break not allowed before this item
266 IFcleft = 0x10000000, // clear left floats (IFbrk set too)
267 IFcright = 0x08000000, // clear right floats (IFbrk set too)
268 IFwrap = 0x04000000, // in a wrapping (non-pre) line
269 IFhang = 0x02000000, // in a hanging (into left indent) item
270 IFrjust = 0x01000000, // right justify current line
271 IFcjust = 0x00800000, // center justify current line
272 IFsmap = 0x00400000, // image is server-side map
273 IFindentshift = 8,
274 IFindentmask = (255<<IFindentshift), // current indent, in tab stops
275 IFhangmask = 255 // current hang into left indent, in 1/10th tabstops
276};
277
278// Bias added to Itext's voff field
279enum { Voffbias = 128 };
280
281// Spacer kinds
282enum {
283 ISPnull, // 0 height and width
284 ISPvline, // height and ascent of current font
285 ISPhspace, // width of space in current font
286 ISPgeneral // other purposes (e.g., between markers and list)
287};
288
289// Generic attributes and events (not many elements will have any of these set)
290struct Genattr
291{
292 Rune* id;
293 Rune* class;
294 Rune* style;
295 Rune* title;
296 SEvent* events;
297};
298
299struct SEvent
300{
301 SEvent* next; // in list of events
302 int type; // SEonblur, etc.
303 Rune* script;
304};
305
306enum {
307 SEonblur, SEonchange, SEonclick, SEondblclick,
308 SEonfocus, SEonkeypress, SEonkeyup, SEonload,
309 SEonmousedown, SEonmousemove, SEonmouseout,
310 SEonmouseover, SEonmouseup, SEonreset, SEonselect,
311 SEonsubmit, SEonunload,
312 Numscriptev
313};
314
315// Form field types
316enum {
317 Ftext,
318 Fpassword,
319 Fcheckbox,
320 Fradio,
321 Fsubmit,
322 Fhidden,
323 Fimage,
324 Freset,
325 Ffile,
326 Fbutton,
327 Fselect,
328 Ftextarea
329};
330
331// Information about a field in a form
332struct Formfield
333{
334 Formfield* next; // in list of fields for a form
335 int ftype; // Ftext, Fpassword, etc.
336 int fieldid; // serial no. of field within its form
337 Form* form; // containing form
338 Rune* name; // name attr
339 Rune* value; // value attr
340 int size; // size attr
341 int maxlength; // maxlength attr
342 int rows; // rows attr
343 int cols; // cols attr
344 uchar flags; // FFchecked, etc.
345 Option* options; // for Fselect fields
346 Item* image; // image item, for Fimage fields
347 int ctlid; // identifies control for this field in layout
348 SEvent* events; // same as genattr->events of containing item
349};
350
351enum {
352 FFchecked = (1<<7),
353 FFmultiple = (1<<6)
354};
355
356// Option holds info about an option in a "select" form field
357struct Option
358{
359 Option* next; // next in list of options for a field
360 int selected; // true if selected initially
361 Rune* value; // value attr
362 Rune* display; // display string
363};
364
365// Form holds info about a form
366struct Form
367{
368 Form* next; // in list of forms for document
369 int formid; // serial no. of form within its doc
370 Rune* name; // name or id attr (netscape uses name, HTML 4.0 uses id)
371 Rune* action; // action attr
372 int target; // target attr as targetid
373 int method; // HGet or HPost
374 int nfields; // number of fields
375 Formfield* fields; // field's forms, in input order
376};
377
378// Flags used in various table structures
379enum {
380 TFparsing = (1<<7),
381 TFnowrap = (1<<6),
382 TFisth = (1<<5)
383};
384
385
386// Information about a table
387struct Table
388{
389 Table* next; // next in list of document's tables
390 int tableid; // serial no. of table within its doc
391 Tablerow* rows; // array of row specs (list during parsing)
392 int nrow; // total number of rows
393 Tablecol* cols; // array of column specs
394 int ncol; // total number of columns
395 Tablecell* cells; // list of unique cells
396 int ncell; // total number of cells
397 Tablecell*** grid; // 2-D array of cells
398 Align align; // alignment spec for whole table
399 Dimen width; // width spec for whole table
400 int border; // border attr
401 int cellspacing; // cellspacing attr
402 int cellpadding; // cellpadding attr
403 Background background; // table background
404 Item* caption; // linked list of Items, giving caption
405 uchar caption_place; // ALtop or ALbottom
406 Lay* caption_lay; // layout of caption
407 int totw; // total width
408 int toth; // total height
409 int caph; // caption height
410 int availw; // used for previous 3 sizes
411 Token* tabletok; // token that started the table
412 uchar flags; // Lchanged, perhaps
413};
414
415
416struct Tablecol
417{
418 int width;
419 Align align;
420 Point pos;
421};
422
423
424struct Tablerow
425{
426 Tablerow* next; // Next in list of rows, during parsing
427 Tablecell* cells; // Cells in row, linked through nextinrow
428 int height;
429 int ascent;
430 Align align;
431 Background background;
432 Point pos;
433 uchar flags; // 0 or TFparsing
434};
435
436
437// A Tablecell is one cell of a table.
438// It may span multiple rows and multiple columns.
439// Cells are linked on two lists: the list for all the cells of
440// a document (the next pointers), and the list of all the
441// cells that start in a given row (the nextinrow pointers)
442struct Tablecell
443{
444 Tablecell* next; // next in list of table's cells
445 Tablecell* nextinrow; // next in list of row's cells
446 int cellid; // serial no. of cell within table
447 Item* content; // contents before layout
448 Lay* lay; // layout of cell
449 int rowspan; // number of rows spanned by this cell
450 int colspan; // number of cols spanned by this cell
451 Align align; // alignment spec
452 uchar flags; // TFparsing, TFnowrap, TFisth
453 Dimen wspec; // suggested width
454 int hspec; // suggested height
455 Background background; // cell background
456 int minw; // minimum possible width
457 int maxw; // maximum width
458 int ascent; // cell's ascent
459 int row; // row of upper left corner
460 int col; // col of upper left corner
461 Point pos; // nw corner of cell contents, in cell
462};
463
464// Anchor is for info about hyperlinks that go somewhere
465struct Anchor
466{
467 Anchor* next; // next in list of document's anchors
468 int index; // serial no. of anchor within its doc
469 Rune* name; // name attr
470 Rune* href; // href attr
471 int target; // target attr as targetid
472};
473
474
475// DestAnchor is for info about hyperlinks that are destinations
476struct DestAnchor
477{
478 DestAnchor* next; // next in list of document's destanchors
479 int index; // serial no. of anchor within its doc
480 Rune* name; // name attr
481 Item* item; // the destination
482};
483
484
485// Maps (client side)
486struct Map
487{
488 Map* next; // next in list of document's maps
489 Rune* name; // map name
490 Area* areas; // list of map areas
491};
492
493
494struct Area
495{
496 Area* next; // next in list of a map's areas
497 int shape; // SHrect, etc.
498 Rune* href; // associated hypertext link
499 int target; // associated target frame
500 Dimen* coords; // array of coords for shape
501 int ncoords; // size of coords array
502};
503
504// Area shapes
505enum {
506 SHrect, SHcircle, SHpoly
507};
508
509// Fonts are represented by integers: style*NumSize + size
510
511// Font styles
512enum {
513 FntR, // roman
514 FntI, // italic
515 FntB, // bold
516 FntT, // typewriter
517 NumStyle
518};
519
520// Font sizes
521enum {
522 Tiny,
523 Small,
524 Normal,
525 Large,
526 Verylarge,
527 NumSize
528};
529
530enum {
531 NumFnt = (NumStyle*NumSize),
532 DefFnt = (FntR*NumSize+Normal)
533};
534
535// Lines are needed through some text items, for underlining or strikethrough
536enum {
537 ULnone, ULunder, ULmid
538};
539
540// Kidinfo flags
541enum {
542 FRnoresize = (1<<0),
543 FRnoscroll = (1<<1),
544 FRhscroll = (1<<2),
545 FRvscroll = (1<<3),
546 FRhscrollauto = (1<<4),
547 FRvscrollauto = (1<<5)
548};
549
550// Information about child frame or frameset
551struct Kidinfo
552{
553 Kidinfo* next; // in list of kidinfos for a frameset
554 int isframeset;
555
556 // fields for "frame"
557 Rune* src; // only nil if a "dummy" frame or this is frameset
558 Rune* name; // always non-empty if this isn't frameset
559 int marginw;
560 int marginh;
561 int framebd;
562 int flags;
563
564 // fields for "frameset"
565 Dimen* rows; // array of row dimensions
566 int nrows; // length of rows
567 Dimen* cols; // array of col dimensions
568 int ncols; // length of cols
569 Kidinfo* kidinfos;
570 Kidinfo* nextframeset; // parsing stack
571};
572
573
574// Document info (global information about HTML page)
575struct Docinfo
576{
577 // stuff from HTTP headers, doc head, and body tag
578 Rune* src; // original source of doc
579 Rune* base; // base URL of doc
580 Rune* doctitle; // from <title> element
581 Background background; // background specification
582 Iimage* backgrounditem; // Image Item for doc background image, or nil
583 int text; // doc foreground (text) color
584 int link; // unvisited hyperlink color
585 int vlink; // visited hyperlink color
586 int alink; // highlighting hyperlink color
587 int target; // target frame default
588 int chset; // ISO_8859, etc.
589 int mediatype; // TextHtml, etc.
590 int scripttype; // TextJavascript, etc.
591 int hasscripts; // true if scripts used
592 Rune* refresh; // content of <http-equiv=Refresh ...>
593 Kidinfo* kidinfo; // if a frameset
594 int frameid; // id of document frame
595
596 // info needed to respond to user actions
597 Anchor* anchors; // list of href anchors
598 DestAnchor* dests; // list of destination anchors
599 Form* forms; // list of forms
600 Table* tables; // list of tables
601 Map* maps; // list of maps
602 Iimage* images; // list of image items (through nextimage links)
603};
604
605extern int dimenkind(Dimen d);
606extern int dimenspec(Dimen d);
607extern void freedocinfo(Docinfo* d);
608extern void freeitems(Item* ithead);
609extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
610extern void printitems(Item* items, char* msg);
611extern int targetid(Rune* s);
612extern Rune* targetname(int targid);
613extern int validitems(Item* i);
614
rsc650deb72004-04-08 19:36:35 +0000615/* #pragma varargck type "I" Item* */
rsc7f111042003-12-11 18:15:57 +0000616
617// Control print output
618extern int warn;
619extern int dbglex;
620extern int dbgbuild;
621
622// To be provided by caller
623// emalloc and erealloc should not return if can't get memory.
624// emalloc should zero its memory.
625extern void* emalloc(ulong);
626extern void* erealloc(void* p, ulong size);
627#ifdef __cpluspplus
628}
629#endif
630#endif