|  | #include <u.h> | 
|  | #include <libc.h> | 
|  | #include <bio.h> | 
|  | #include "dict.h" | 
|  |  | 
|  | enum { | 
|  | Buflen=1000, | 
|  | Maxaux=5 | 
|  | }; | 
|  |  | 
|  | /* Possible tags */ | 
|  | enum { | 
|  | A,		/* author in quote (small caps) */ | 
|  | B,		/* bold */ | 
|  | Ba,		/* author inside bib */ | 
|  | Bch,		/* builtup chem component */ | 
|  | Bib,		/* surrounds word 'in' for bibliographic ref */ | 
|  | Bl,		/* bold */ | 
|  | Bo,		/* bond over */ | 
|  | Bu,		/* bond under */ | 
|  | Cb,		/* ? block of stuff (indent) */ | 
|  | Cf,		/* cross ref to another entry (italics) */ | 
|  | Chem,		/* chemistry formula */ | 
|  | Co,		/* over (preceding sum, integral, etc.) */ | 
|  | Col,		/* column of table (aux just may be r) */ | 
|  | Cu,		/* under (preceding sum, integral, etc.) */ | 
|  | Dat,		/* date */ | 
|  | Db,		/* def block? indent */ | 
|  | Dn,		/* denominator of fraction */ | 
|  | E,		/* main entry */ | 
|  | Ed,		/* editor's comments (in [...]) */ | 
|  | Etym,		/* etymology (in [...]) */ | 
|  | Fq,		/* frequency count (superscript) */ | 
|  | Form,		/* formula */ | 
|  | Fr,		/* fraction (contains <nu>, then <dn>) */ | 
|  | Gk,		/* greek (transliteration) */ | 
|  | Gr,		/* grammar? (e.g., around 'pa.' in 'pa. pple.') */ | 
|  | Hg,		/* headword group */ | 
|  | Hm,		/* homonym (superscript) */ | 
|  | Hw,		/* headword (bold) */ | 
|  | I,		/* italics */ | 
|  | Il,		/* italic list? */ | 
|  | In,		/* inferior (subscript) */ | 
|  | L,		/* row of col of table */ | 
|  | La,		/* status or usage label (italic) */ | 
|  | Lc,		/* chapter/verse sort of thing for works */ | 
|  | N,		/* note (smaller type) */ | 
|  | Nu,		/* numerator of fraction */ | 
|  | Ov,		/* needs overline */ | 
|  | P,		/* paragraph (indent) */ | 
|  | Ph,		/* pronunciation (transliteration) */ | 
|  | Pi,		/* pile (frac without line) */ | 
|  | Pqp,		/* subblock of quote */ | 
|  | Pr,		/* pronunciation (in (...)) */ | 
|  | Ps,		/* position (e.g., adv.) (italic) */ | 
|  | Pt,		/* part (in lc) */ | 
|  | Q,		/* quote in quote block */ | 
|  | Qd,		/* quote date (bold) */ | 
|  | Qig,		/* quote number (greek) */ | 
|  | Qla,		/* status or usage label in quote (italic) */ | 
|  | Qp,		/* quote block (small type, indent) */ | 
|  | Qsn,		/* quote number */ | 
|  | Qt,		/* quote words */ | 
|  | R,		/* roman type style */ | 
|  | Rx,		/* relative cross reference (e.g., next) */ | 
|  | S,		/* another form? (italic) */ | 
|  | S0,		/* sense (sometimes surrounds several sx's) */ | 
|  | S1,		/* sense (aux num: indented bold letter) */ | 
|  | S2,		/* sense (aux num: indented bold capital rom num) */ | 
|  | S3,		/* sense (aux num: indented number of asterisks) */ | 
|  | S4,		/* sense (aux num: indented bold number) */ | 
|  | S5,		/* sense (aux num: indented number of asterisks) */ | 
|  | S6,		/* subsense (aux num: bold letter) */ | 
|  | S7a,		/* subsense (aux num: letter) */ | 
|  | S7n,		/* subsense (aux num: roman numeral) */ | 
|  | Sc,		/* small caps */ | 
|  | Sgk,		/* subsense (aux num: transliterated greek) */ | 
|  | Sn,		/* sense of subdefinition (aux num: roman letter) */ | 
|  | Ss,		/* sans serif */ | 
|  | Ssb,		/* sans serif bold */ | 
|  | Ssi,		/* sans serif italic */ | 
|  | Su,		/* superior (superscript) */ | 
|  | Sub,		/* subdefinition */ | 
|  | Table,		/* table (aux cols=number of columns) */ | 
|  | Tt,		/* title? (italics) */ | 
|  | Vd,		/* numeric label for variant form */ | 
|  | Ve,		/* variant entry */ | 
|  | Vf,		/* variant form (light bold) */ | 
|  | Vfl,		/* list of vf's (starts with Also or Forms) */ | 
|  | W,		/* work (e.g., Beowulf) (italics) */ | 
|  | X,		/* cross reference to main word (small caps) */ | 
|  | Xd,		/* cross reference to quotation by date */ | 
|  | Xi,		/* internal cross reference ? (italic) */ | 
|  | Xid,		/* cross reference identifer, in quote ? */ | 
|  | Xs,		/* cross reference sense (lower number) */ | 
|  | Xr,		/* list of x's */ | 
|  | Ntag		/* end of tags */ | 
|  | }; | 
|  |  | 
|  | /* Assoc tables must be sorted on first field */ | 
|  |  | 
|  | static Assoc tagtab[] = { | 
|  | {"a",		A}, | 
|  | {"b",		B}, | 
|  | {"ba",		Ba}, | 
|  | {"bch",		Bch}, | 
|  | {"bib",		Bib}, | 
|  | {"bl",		Bl}, | 
|  | {"bo",		Bo}, | 
|  | {"bu",		Bu}, | 
|  | {"cb",		Cb}, | 
|  | {"cf",		Cf}, | 
|  | {"chem",	Chem}, | 
|  | {"co",		Co}, | 
|  | {"col",		Col}, | 
|  | {"cu",		Cu}, | 
|  | {"dat",		Dat}, | 
|  | {"db",		Db}, | 
|  | {"dn",		Dn}, | 
|  | {"e",		E}, | 
|  | {"ed",		Ed}, | 
|  | {"et",		Etym}, | 
|  | {"etym",	Etym}, | 
|  | {"form",	Form}, | 
|  | {"fq",		Fq}, | 
|  | {"fr",		Fr}, | 
|  | {"frac",	Fr}, | 
|  | {"gk",		Gk}, | 
|  | {"gr",		Gr}, | 
|  | {"hg",		Hg}, | 
|  | {"hm",		Hm}, | 
|  | {"hw",		Hw}, | 
|  | {"i",		I}, | 
|  | {"il",		Il}, | 
|  | {"in",		In}, | 
|  | {"l",		L}, | 
|  | {"la",		La}, | 
|  | {"lc",		Lc}, | 
|  | {"n",		N}, | 
|  | {"nu",		Nu}, | 
|  | {"ov",		Ov}, | 
|  | {"p",		P}, | 
|  | {"ph",		Ph}, | 
|  | {"pi",		Pi}, | 
|  | {"pqp",		Pqp}, | 
|  | {"pr",		Pr}, | 
|  | {"ps",		Ps}, | 
|  | {"pt",		Pt}, | 
|  | {"q",		Q}, | 
|  | {"qd",		Qd}, | 
|  | {"qig",		Qig}, | 
|  | {"qla",		Qla}, | 
|  | {"qp",		Qp}, | 
|  | {"qsn",		Qsn}, | 
|  | {"qt",		Qt}, | 
|  | {"r",		R}, | 
|  | {"rx",		Rx}, | 
|  | {"s",		S}, | 
|  | {"s0",		S0}, | 
|  | {"s1",		S1}, | 
|  | {"s2",		S2}, | 
|  | {"s3",		S3}, | 
|  | {"s4",		S4}, | 
|  | {"s5",		S5}, | 
|  | {"s6",		S6}, | 
|  | {"s7a",		S7a}, | 
|  | {"s7n",		S7n}, | 
|  | {"sc",		Sc}, | 
|  | {"sgk",		Sgk}, | 
|  | {"sn",		Sn}, | 
|  | {"ss",		Ss,}, | 
|  | {"ssb",		Ssb}, | 
|  | {"ssi",		Ssi}, | 
|  | {"su",		Su}, | 
|  | {"sub",		Sub}, | 
|  | {"table",	Table}, | 
|  | {"tt",		Tt}, | 
|  | {"vd",		Vd}, | 
|  | {"ve",		Ve}, | 
|  | {"vf",		Vf}, | 
|  | {"vfl",		Vfl}, | 
|  | {"w",		W}, | 
|  | {"x",		X}, | 
|  | {"xd",		Xd}, | 
|  | {"xi",		Xi}, | 
|  | {"xid",		Xid}, | 
|  | {"xr",		Xr}, | 
|  | {"xs",		Xs} | 
|  | }; | 
|  |  | 
|  | /* Possible tag auxilliary info */ | 
|  | enum { | 
|  | Cols,		/* number of columns in a table */ | 
|  | Num,		/* letter or number, for a sense */ | 
|  | St,		/* status (e.g., obs) */ | 
|  | Naux | 
|  | }; | 
|  |  | 
|  | static Assoc auxtab[] = { | 
|  | {"cols",	Cols}, | 
|  | {"num",		Num}, | 
|  | {"st",		St} | 
|  | }; | 
|  |  | 
|  | static Assoc spectab[] = { | 
|  | {"3on4",	0xbe}, | 
|  | {"Aacu",	0xc1}, | 
|  | {"Aang",	0xc5}, | 
|  | {"Abarab",	0x100}, | 
|  | {"Acirc",	0xc2}, | 
|  | {"Ae",		0xc6}, | 
|  | {"Agrave",	0xc0}, | 
|  | {"Alpha",	0x391}, | 
|  | {"Amac",	0x100}, | 
|  | {"Asg",		0x1b7},		/* Unicyle. Cf "Sake" */ | 
|  | {"Auml",	0xc4}, | 
|  | {"Beta",	0x392}, | 
|  | {"Cced",	0xc7}, | 
|  | {"Chacek",	0x10c}, | 
|  | {"Chi",		0x3a7}, | 
|  | {"Chirho",	0x2627},		/* Chi Rho U+2627 */ | 
|  | {"Csigma",	0x3da}, | 
|  | {"Delta",	0x394}, | 
|  | {"Eacu",	0xc9}, | 
|  | {"Ecirc",	0xca}, | 
|  | {"Edh",		0xd0}, | 
|  | {"Epsilon",	0x395}, | 
|  | {"Eta",		0x397}, | 
|  | {"Gamma",	0x393}, | 
|  | {"Iacu",	0xcd}, | 
|  | {"Icirc",	0xce}, | 
|  | {"Imac",	0x12a}, | 
|  | {"Integ",	0x222b}, | 
|  | {"Iota",	0x399}, | 
|  | {"Kappa",	0x39a}, | 
|  | {"Koppa",	0x3de}, | 
|  | {"Lambda",	0x39b}, | 
|  | {"Lbar",	0x141}, | 
|  | {"Mu",		0x39c}, | 
|  | {"Naira",	0x4e},		/* should have bar through */ | 
|  | {"Nplus",	0x4e},		/* should have plus above */ | 
|  | {"Ntilde",	0xd1}, | 
|  | {"Nu",		0x39d}, | 
|  | {"Oacu",	0xd3}, | 
|  | {"Obar",	0xd8}, | 
|  | {"Ocirc",	0xd4}, | 
|  | {"Oe",		0x152}, | 
|  | {"Omega",	0x3a9}, | 
|  | {"Omicron",	0x39f}, | 
|  | {"Ouml",	0xd6}, | 
|  | {"Phi",		0x3a6}, | 
|  | {"Pi",		0x3a0}, | 
|  | {"Psi",		0x3a8}, | 
|  | {"Rho",		0x3a1}, | 
|  | {"Sacu",	0x15a}, | 
|  | {"Sigma",	0x3a3}, | 
|  | {"Summ",	0x2211}, | 
|  | {"Tau",		0x3a4}, | 
|  | {"Th",		0xde}, | 
|  | {"Theta",	0x398}, | 
|  | {"Tse",		0x426}, | 
|  | {"Uacu",	0xda}, | 
|  | {"Ucirc",	0xdb}, | 
|  | {"Upsilon",	0x3a5}, | 
|  | {"Uuml",	0xdc}, | 
|  | {"Wyn",		0x1bf},		/* wynn U+01BF */ | 
|  | {"Xi",		0x39e}, | 
|  | {"Ygh",		0x1b7},		/* Yogh	U+01B7 */ | 
|  | {"Zeta",	0x396}, | 
|  | {"Zh",		0x1b7},		/* looks like Yogh. Cf "Sake" */ | 
|  | {"a",		0x61},		/* ante */ | 
|  | {"aacu",	0xe1}, | 
|  | {"aang",	0xe5}, | 
|  | {"aasper",	MAAS}, | 
|  | {"abreve",	0x103}, | 
|  | {"acirc",	0xe2}, | 
|  | {"acu",		LACU}, | 
|  | {"ae",		0xe6}, | 
|  | {"agrave",	0xe0}, | 
|  | {"ahook",	0x105}, | 
|  | {"alenis",	MALN}, | 
|  | {"alpha",	0x3b1}, | 
|  | {"amac",	0x101}, | 
|  | {"amp",		0x26}, | 
|  | {"and",		MAND}, | 
|  | {"ang",		LRNG}, | 
|  | {"angle",	0x2220}, | 
|  | {"ankh",	0x2625},		/* ankh U+2625 */ | 
|  | {"ante",	0x61},		/* before (year) */ | 
|  | {"aonq",	MAOQ}, | 
|  | {"appreq",	0x2243}, | 
|  | {"aquar",	0x2652}, | 
|  | {"arDadfull",	0x636},		/* Dad U+0636 */ | 
|  | {"arHa",	0x62d},		/* haa U+062D */ | 
|  | {"arTa",	0x62a},		/* taa U+062A */ | 
|  | {"arain",	0x639},		/* ain U+0639 */ | 
|  | {"arainfull",	0x639},		/* ain U+0639 */ | 
|  | {"aralif",	0x627},		/* alef U+0627 */ | 
|  | {"arba",	0x628},		/* baa U+0628 */ | 
|  | {"arha",	0x647},		/* ha U+0647 */ | 
|  | {"aries",	0x2648}, | 
|  | {"arnun",	0x646},		/* noon U+0646 */ | 
|  | {"arnunfull",	0x646},		/* noon U+0646 */ | 
|  | {"arpa",	0x647},		/* ha U+0647 */ | 
|  | {"arqoph",	0x642},		/* qaf U+0642 */ | 
|  | {"arshinfull",	0x634},		/* sheen U+0634 */ | 
|  | {"arta",	0x62a},		/* taa U+062A */ | 
|  | {"artafull",	0x62a},		/* taa U+062A */ | 
|  | {"artha",	0x62b},		/* thaa U+062B */ | 
|  | {"arwaw",	0x648},		/* waw U+0648 */ | 
|  | {"arya",	0x64a},		/* ya U+064A */ | 
|  | {"aryafull",	0x64a},		/* ya U+064A */ | 
|  | {"arzero",	0x660},		/* indic zero U+0660 */ | 
|  | {"asg",		0x292},		/* unicycle character. Cf "hallow" */ | 
|  | {"asper",	LASP}, | 
|  | {"assert",	0x22a2}, | 
|  | {"astm",	0x2042},		/* asterism: should be upside down */ | 
|  | {"at",		0x40}, | 
|  | {"atilde",	0xe3}, | 
|  | {"auml",	0xe4}, | 
|  | {"ayin",	0x639},		/* arabic ain U+0639 */ | 
|  | {"b1",		0x2d},		/* single bond */ | 
|  | {"b2",		0x3d},		/* double bond */ | 
|  | {"b3",		0x2261},		/* triple bond */ | 
|  | {"bbar",	0x180},		/* b with bar U+0180 */ | 
|  | {"beta",	0x3b2}, | 
|  | {"bigobl",	0x2f}, | 
|  | {"blC",		0x43},		/* should be black letter */ | 
|  | {"blJ",		0x4a},		/* should be black letter */ | 
|  | {"blU",		0x55},		/* should be black letter */ | 
|  | {"blb",		0x62},		/* should be black letter */ | 
|  | {"blozenge",	0x25ca},		/* U+25CA; should be black */ | 
|  | {"bly",		0x79},		/* should be black letter */ | 
|  | {"bra",		MBRA}, | 
|  | {"brbl",	LBRB}, | 
|  | {"breve",	LBRV}, | 
|  | {"bslash",	'\\'}, | 
|  | {"bsquare",	0x25a0},		/* black square U+25A0 */ | 
|  | {"btril",	0x25c0},		/* U+25C0 */ | 
|  | {"btrir",	0x25b6},		/* U+25B6 */ | 
|  | {"c",		0x63},		/* circa */ | 
|  | {"cab",		0x232a}, | 
|  | {"cacu",	0x107}, | 
|  | {"canc",	0x264b}, | 
|  | {"capr",	0x2651}, | 
|  | {"caret",	0x5e}, | 
|  | {"cb",		0x7d}, | 
|  | {"cbigb",	0x7d}, | 
|  | {"cbigpren",	0x29}, | 
|  | {"cbigsb",	0x5d}, | 
|  | {"cced",	0xe7}, | 
|  | {"cdil",	LCED}, | 
|  | {"cdsb",	0x301b},		/* ]] U+301b */ | 
|  | {"cent",	0xa2}, | 
|  | {"chacek",	0x10d}, | 
|  | {"chi",		0x3c7}, | 
|  | {"circ",	LRNG}, | 
|  | {"circa",	0x63},		/* about (year) */ | 
|  | {"circbl",	0x325},		/* ring below accent U+0325 */ | 
|  | {"circle",	0x25cb},		/* U+25CB */ | 
|  | {"circledot",	0x2299}, | 
|  | {"click",	0x296}, | 
|  | {"club",	0x2663}, | 
|  | {"comtime",	0x43}, | 
|  | {"conj",	0x260c}, | 
|  | {"cprt",	0xa9}, | 
|  | {"cq",		'\''}, | 
|  | {"cqq",		0x201d}, | 
|  | {"cross",	0x2720},		/* maltese cross U+2720 */ | 
|  | {"crotchet",	0x2669}, | 
|  | {"csb",		0x5d}, | 
|  | {"ctilde",	0x63},		/* +tilde */ | 
|  | {"ctlig",	MLCT}, | 
|  | {"cyra",	0x430}, | 
|  | {"cyre",	0x435}, | 
|  | {"cyrhard",	0x44a}, | 
|  | {"cyrjat",	0x463}, | 
|  | {"cyrm",	0x43c}, | 
|  | {"cyrn",	0x43d}, | 
|  | {"cyrr",	0x440}, | 
|  | {"cyrsoft",	0x44c}, | 
|  | {"cyrt",	0x442}, | 
|  | {"cyry",	0x44b}, | 
|  | {"dag",		0x2020}, | 
|  | {"dbar",	0x111}, | 
|  | {"dblar",	0x21cb}, | 
|  | {"dblgt",	0x226b}, | 
|  | {"dbllt",	0x226a}, | 
|  | {"dced",	0x64},		/* +cedilla */ | 
|  | {"dd",		MDD}, | 
|  | {"ddag",	0x2021}, | 
|  | {"ddd",		MDDD}, | 
|  | {"decr",	0x2193}, | 
|  | {"deg",		0xb0}, | 
|  | {"dele",	0x64},		/* should be dele */ | 
|  | {"delta",	0x3b4}, | 
|  | {"descnode",	0x260b},		/* descending node U+260B */ | 
|  | {"diamond",	0x2662}, | 
|  | {"digamma",	0x3dd}, | 
|  | {"div",		0xf7}, | 
|  | {"dlessi",	0x131}, | 
|  | {"dlessj1",	0x6a},		/* should be dotless */ | 
|  | {"dlessj2",	0x6a},		/* should be dotless */ | 
|  | {"dlessj3",	0x6a},		/* should be dotless */ | 
|  | {"dollar",	0x24}, | 
|  | {"dotab",	LDOT}, | 
|  | {"dotbl",	LDTB}, | 
|  | {"drachm",	0x292}, | 
|  | {"dubh",	0x2d}, | 
|  | {"eacu",	0xe9}, | 
|  | {"earth",	0x2641}, | 
|  | {"easper",	MEAS}, | 
|  | {"ebreve",	0x115}, | 
|  | {"ecirc",	0xea}, | 
|  | {"edh",		0xf0}, | 
|  | {"egrave",	0xe8}, | 
|  | {"ehacek",	0x11b}, | 
|  | {"ehook",	0x119}, | 
|  | {"elem",	0x220a}, | 
|  | {"elenis",	MELN}, | 
|  | {"em",		0x2014}, | 
|  | {"emac",	0x113}, | 
|  | {"emem",	MEMM}, | 
|  | {"en",		0x2013}, | 
|  | {"epsilon",	0x3b5}, | 
|  | {"equil",	0x21cb}, | 
|  | {"ergo",	0x2234}, | 
|  | {"es",		MES}, | 
|  | {"eszett",	0xdf}, | 
|  | {"eta",		0x3b7}, | 
|  | {"eth",		0xf0}, | 
|  | {"euml",	0xeb}, | 
|  | {"expon",	0x2191}, | 
|  | {"fact",	0x21}, | 
|  | {"fata",	0x251}, | 
|  | {"fatpara",	0xb6},		/* should have fatter, filled in bowl */ | 
|  | {"female",	0x2640}, | 
|  | {"ffilig",	MLFFI}, | 
|  | {"fflig",	MLFF}, | 
|  | {"ffllig",	MLFFL}, | 
|  | {"filig",	MLFI}, | 
|  | {"flat",	0x266d}, | 
|  | {"fllig",	MLFL}, | 
|  | {"frE",		0x45},		/* should be curly */ | 
|  | {"frL",		'L'},		/* should be curly */ | 
|  | {"frR",		0x52},		/* should be curly */ | 
|  | {"frakB",	0x42},		/* should have fraktur style */ | 
|  | {"frakG",	0x47}, | 
|  | {"frakH",	0x48}, | 
|  | {"frakI",	0x49}, | 
|  | {"frakM",	0x4d}, | 
|  | {"frakU",	0x55}, | 
|  | {"frakX",	0x58}, | 
|  | {"frakY",	0x59}, | 
|  | {"frakh",	0x68}, | 
|  | {"frbl",	LFRB}, | 
|  | {"frown",	LFRN}, | 
|  | {"fs",		0x20}, | 
|  | {"fsigma",	0x3c2}, | 
|  | {"gAacu",	0xc1},		/* should be Α+acute */ | 
|  | {"gaacu",	0x3b1},		/* +acute */ | 
|  | {"gabreve",	0x3b1},		/* +breve */ | 
|  | {"gafrown",	0x3b1},		/* +frown */ | 
|  | {"gagrave",	0x3b1},		/* +grave */ | 
|  | {"gamac",	0x3b1},		/* +macron */ | 
|  | {"gamma",	0x3b3}, | 
|  | {"gauml",	0x3b1},		/* +umlaut */ | 
|  | {"ge",		0x2267}, | 
|  | {"geacu",	0x3b5},		/* +acute */ | 
|  | {"gegrave",	0x3b5},		/* +grave */ | 
|  | {"ghacu",	0x3b7},		/* +acute */ | 
|  | {"ghfrown",	0x3b7},		/* +frown */ | 
|  | {"ghgrave",	0x3b7},		/* +grave */ | 
|  | {"ghmac",	0x3b7},		/* +macron */ | 
|  | {"giacu",	0x3b9},		/* +acute */ | 
|  | {"gibreve",	0x3b9},		/* +breve */ | 
|  | {"gifrown",	0x3b9},		/* +frown */ | 
|  | {"gigrave",	0x3b9},		/* +grave */ | 
|  | {"gimac",	0x3b9},		/* +macron */ | 
|  | {"giuml",	0x3b9},		/* +umlaut */ | 
|  | {"glagjat",	0x467}, | 
|  | {"glots",	0x2c0}, | 
|  | {"goacu",	0x3bf},		/* +acute */ | 
|  | {"gobreve",	0x3bf},		/* +breve */ | 
|  | {"grave",	LGRV}, | 
|  | {"gt",		0x3e}, | 
|  | {"guacu",	0x3c5},		/* +acute */ | 
|  | {"gufrown",	0x3c5},		/* +frown */ | 
|  | {"gugrave",	0x3c5},		/* +grave */ | 
|  | {"gumac",	0x3c5},		/* +macron */ | 
|  | {"guuml",	0x3c5},		/* +umlaut */ | 
|  | {"gwacu",	0x3c9},		/* +acute */ | 
|  | {"gwfrown",	0x3c9},		/* +frown */ | 
|  | {"gwgrave",	0x3c9},		/* +grave */ | 
|  | {"hacek",	LHCK}, | 
|  | {"halft",	0x2308}, | 
|  | {"hash",	0x23}, | 
|  | {"hasper",	MHAS}, | 
|  | {"hatpath",	0x5b2},		/* hataf patah U+05B2 */ | 
|  | {"hatqam",	0x5b3},		/* hataf qamats U+05B3 */ | 
|  | {"hatseg",	0x5b1},		/* hataf segol U+05B1 */ | 
|  | {"hbar",	0x127}, | 
|  | {"heart",	0x2661}, | 
|  | {"hebaleph",	0x5d0},		/* aleph U+05D0 */ | 
|  | {"hebayin",	0x5e2},		/* ayin U+05E2 */ | 
|  | {"hebbet",	0x5d1},		/* bet U+05D1 */ | 
|  | {"hebbeth",	0x5d1},		/* bet U+05D1 */ | 
|  | {"hebcheth",	0x5d7},		/* bet U+05D7 */ | 
|  | {"hebdaleth",	0x5d3},		/* dalet U+05D3 */ | 
|  | {"hebgimel",	0x5d2},		/* gimel U+05D2 */ | 
|  | {"hebhe",	0x5d4},		/* he U+05D4 */ | 
|  | {"hebkaph",	0x5db},		/* kaf U+05DB */ | 
|  | {"heblamed",	0x5dc},		/* lamed U+05DC */ | 
|  | {"hebmem",	0x5de},		/* mem U+05DE */ | 
|  | {"hebnun",	0x5e0},		/* nun U+05E0 */ | 
|  | {"hebnunfin",	0x5df},		/* final nun U+05DF */ | 
|  | {"hebpe",	0x5e4},		/* pe U+05E4 */ | 
|  | {"hebpedag",	0x5e3},		/* final pe? U+05E3 */ | 
|  | {"hebqoph",	0x5e7},		/* qof U+05E7 */ | 
|  | {"hebresh",	0x5e8},		/* resh U+05E8 */ | 
|  | {"hebshin",	0x5e9},		/* shin U+05E9 */ | 
|  | {"hebtav",	0x5ea},		/* tav U+05EA */ | 
|  | {"hebtsade",	0x5e6},		/* tsadi U+05E6 */ | 
|  | {"hebwaw",	0x5d5},		/* vav? U+05D5 */ | 
|  | {"hebyod",	0x5d9},		/* yod U+05D9 */ | 
|  | {"hebzayin",	0x5d6},		/* zayin U+05D6 */ | 
|  | {"hgz",		0x292},		/* ??? Cf "alet" */ | 
|  | {"hireq",	0x5b4},		/* U+05B4 */ | 
|  | {"hlenis",	MHLN}, | 
|  | {"hook",	LOGO}, | 
|  | {"horizE",	0x45},		/* should be on side */ | 
|  | {"horizP",	0x50},		/* should be on side */ | 
|  | {"horizS",	0x223d}, | 
|  | {"horizT",	0x22a3}, | 
|  | {"horizb",	0x7b},		/* should be underbrace */ | 
|  | {"ia",		0x3b1}, | 
|  | {"iacu",	0xed}, | 
|  | {"iasper",	MIAS}, | 
|  | {"ib",		0x3b2}, | 
|  | {"ibar",	0x268}, | 
|  | {"ibreve",	0x12d}, | 
|  | {"icirc",	0xee}, | 
|  | {"id",		0x3b4}, | 
|  | {"ident",	0x2261}, | 
|  | {"ie",		0x3b5}, | 
|  | {"ifilig",	MLFI}, | 
|  | {"ifflig",	MLFF}, | 
|  | {"ig",		0x3b3}, | 
|  | {"igrave",	0xec}, | 
|  | {"ih",		0x3b7}, | 
|  | {"ii",		0x3b9}, | 
|  | {"ik",		0x3ba}, | 
|  | {"ilenis",	MILN}, | 
|  | {"imac",	0x12b}, | 
|  | {"implies",	0x21d2}, | 
|  | {"index",	0x261e}, | 
|  | {"infin",	0x221e}, | 
|  | {"integ",	0x222b}, | 
|  | {"intsec",	0x2229}, | 
|  | {"invpri",	0x2cf}, | 
|  | {"iota",	0x3b9}, | 
|  | {"iq",		0x3c8}, | 
|  | {"istlig",	MLST}, | 
|  | {"isub",	0x3f5},		/* iota below accent */ | 
|  | {"iuml",	0xef}, | 
|  | {"iz",		0x3b6}, | 
|  | {"jup",		0x2643}, | 
|  | {"kappa",	0x3ba}, | 
|  | {"koppa",	0x3df}, | 
|  | {"lambda",	0x3bb}, | 
|  | {"lar",		0x2190}, | 
|  | {"lbar",	0x142}, | 
|  | {"le",		0x2266}, | 
|  | {"lenis",	LLEN}, | 
|  | {"leo",		0x264c}, | 
|  | {"lhalfbr",	0x2308}, | 
|  | {"lhshoe",	0x2283}, | 
|  | {"libra",	0x264e}, | 
|  | {"llswing",	MLLS}, | 
|  | {"lm",		0x2d0}, | 
|  | {"logicand",	0x2227}, | 
|  | {"logicor",	0x2228}, | 
|  | {"longs",	0x283}, | 
|  | {"lrar",	0x2194}, | 
|  | {"lt",		0x3c}, | 
|  | {"ltappr",	0x227e}, | 
|  | {"ltflat",	0x2220}, | 
|  | {"lumlbl",	0x6c},		/* +umlaut below */ | 
|  | {"mac",		LMAC}, | 
|  | {"male",	0x2642}, | 
|  | {"mc",		0x63},		/* should be raised */ | 
|  | {"merc",	0x263f},		/* mercury U+263F */ | 
|  | {"min",		0x2212}, | 
|  | {"moonfq",	0x263d},		/* first quarter moon U+263D */ | 
|  | {"moonlq",	0x263e},		/* last quarter moon U+263E */ | 
|  | {"msylab",	0x6d},		/* +sylab (ˌ) */ | 
|  | {"mu",		0x3bc}, | 
|  | {"nacu",	0x144}, | 
|  | {"natural",	0x266e}, | 
|  | {"neq",		0x2260}, | 
|  | {"nfacu",	0x2032}, | 
|  | {"nfasper",	0x2bd}, | 
|  | {"nfbreve",	0x2d8}, | 
|  | {"nfced",	0xb8}, | 
|  | {"nfcirc",	0x2c6}, | 
|  | {"nffrown",	0x2322}, | 
|  | {"nfgra",	0x2cb}, | 
|  | {"nfhacek",	0x2c7}, | 
|  | {"nfmac",	0xaf}, | 
|  | {"nftilde",	0x2dc}, | 
|  | {"nfuml",	0xa8}, | 
|  | {"ng",		0x14b}, | 
|  | {"not",		0xac}, | 
|  | {"notelem",	0x2209}, | 
|  | {"ntilde",	0xf1}, | 
|  | {"nu",		0x3bd}, | 
|  | {"oab",		0x2329}, | 
|  | {"oacu",	0xf3}, | 
|  | {"oasper",	MOAS}, | 
|  | {"ob",		0x7b}, | 
|  | {"obar",	0xf8}, | 
|  | {"obigb",	0x7b},		/* should be big */ | 
|  | {"obigpren",	0x28}, | 
|  | {"obigsb",	0x5b},		/* should be big */ | 
|  | {"obreve",	0x14f}, | 
|  | {"ocirc",	0xf4}, | 
|  | {"odsb",	0x301a},		/* [[ U+301A */ | 
|  | {"oe",		0x153}, | 
|  | {"oeamp",	0x26}, | 
|  | {"ograve",	0xf2}, | 
|  | {"ohook",	0x6f},		/* +hook */ | 
|  | {"olenis",	MOLN}, | 
|  | {"omac",	0x14d}, | 
|  | {"omega",	0x3c9}, | 
|  | {"omicron",	0x3bf}, | 
|  | {"ope",		0x25b}, | 
|  | {"opp",		0x260d}, | 
|  | {"oq",		0x60}, | 
|  | {"oqq",		0x201c}, | 
|  | {"or",		MOR}, | 
|  | {"osb",		0x5b}, | 
|  | {"otilde",	0xf5}, | 
|  | {"ouml",	0xf6}, | 
|  | {"ounce",	0x2125},		/* ounce U+2125 */ | 
|  | {"ovparen",	0x2322},		/* should be sideways ( */ | 
|  | {"p",		0x2032}, | 
|  | {"pa",		0x2202}, | 
|  | {"page",	0x50}, | 
|  | {"pall",	0x28e}, | 
|  | {"paln",	0x272}, | 
|  | {"par",		PAR}, | 
|  | {"para",	0xb6}, | 
|  | {"pbar",	0x70},		/* +bar */ | 
|  | {"per",		0x2118},		/* per U+2118 */ | 
|  | {"phi",		0x3c6}, | 
|  | {"phi2",	0x3d5}, | 
|  | {"pi",		0x3c0}, | 
|  | {"pisces",	0x2653}, | 
|  | {"planck",	0x127}, | 
|  | {"plantinJ",	0x4a},		/* should be script */ | 
|  | {"pm",		0xb1}, | 
|  | {"pmil",	0x2030}, | 
|  | {"pp",		0x2033}, | 
|  | {"ppp",		0x2034}, | 
|  | {"prop",	0x221d}, | 
|  | {"psi",		0x3c8}, | 
|  | {"pstlg",	0xa3}, | 
|  | {"q",		0x3f},		/* should be raised */ | 
|  | {"qamets",	0x5b3},		/* U+05B3 */ | 
|  | {"quaver",	0x266a}, | 
|  | {"rar",		0x2192}, | 
|  | {"rasper",	MRAS}, | 
|  | {"rdot",	0xb7}, | 
|  | {"recipe",	0x211e},		/* U+211E */ | 
|  | {"reg",		0xae}, | 
|  | {"revC",	0x186},		/* open O U+0186 */ | 
|  | {"reva",	0x252}, | 
|  | {"revc",	0x254}, | 
|  | {"revope",	0x25c}, | 
|  | {"revr",	0x279}, | 
|  | {"revsc",	0x2d2},		/* upside-down semicolon */ | 
|  | {"revv",	0x28c}, | 
|  | {"rfa",		0x6f},		/* +hook (Cf "goal") */ | 
|  | {"rhacek",	0x159}, | 
|  | {"rhalfbr",	0x2309}, | 
|  | {"rho",		0x3c1}, | 
|  | {"rhshoe",	0x2282}, | 
|  | {"rlenis",	MRLN}, | 
|  | {"rsylab",	0x72},		/* +sylab */ | 
|  | {"runash",	0x46},		/* should be runic 'ash' */ | 
|  | {"rvow",	0x2d4}, | 
|  | {"sacu",	0x15b}, | 
|  | {"sagit",	0x2650}, | 
|  | {"sampi",	0x3e1}, | 
|  | {"saturn",	0x2644}, | 
|  | {"sced",	0x15f}, | 
|  | {"schwa",	0x259}, | 
|  | {"scorpio",	0x264f}, | 
|  | {"scrA",	0x41},		/* should be script */ | 
|  | {"scrC",	0x43}, | 
|  | {"scrE",	0x45}, | 
|  | {"scrF",	0x46}, | 
|  | {"scrI",	0x49}, | 
|  | {"scrJ",	0x4a}, | 
|  | {"scrL",	'L'}, | 
|  | {"scrO",	0x4f}, | 
|  | {"scrP",	0x50}, | 
|  | {"scrQ",	0x51}, | 
|  | {"scrS",	0x53}, | 
|  | {"scrT",	0x54}, | 
|  | {"scrb",	0x62}, | 
|  | {"scrd",	0x64}, | 
|  | {"scrh",	0x68}, | 
|  | {"scrl",	0x6c}, | 
|  | {"scruple",	0x2108},		/* U+2108 */ | 
|  | {"sdd",		0x2d0}, | 
|  | {"sect",	0xa7}, | 
|  | {"semE",	0x2203}, | 
|  | {"sh",		0x283}, | 
|  | {"shacek",	0x161}, | 
|  | {"sharp",	0x266f}, | 
|  | {"sheva",	0x5b0},		/* U+05B0 */ | 
|  | {"shti",	0x26a}, | 
|  | {"shtsyll",	0x222a}, | 
|  | {"shtu",	0x28a}, | 
|  | {"sidetri",	0x22b2}, | 
|  | {"sigma",	0x3c3}, | 
|  | {"since",	0x2235}, | 
|  | {"slge",	0x2265},		/* should have slanted line under */ | 
|  | {"slle",	0x2264},		/* should have slanted line under */ | 
|  | {"sm",		0x2c8}, | 
|  | {"smm",		0x2cc}, | 
|  | {"spade",	0x2660}, | 
|  | {"sqrt",	0x221a}, | 
|  | {"square",	0x25a1},		/* U+25A1 */ | 
|  | {"ssChi",	0x3a7},		/* should be sans serif */ | 
|  | {"ssIota",	0x399}, | 
|  | {"ssOmicron",	0x39f}, | 
|  | {"ssPi",	0x3a0}, | 
|  | {"ssRho",	0x3a1}, | 
|  | {"ssSigma",	0x3a3}, | 
|  | {"ssTau",	0x3a4}, | 
|  | {"star",	0x2a}, | 
|  | {"stlig",	MLST}, | 
|  | {"sup2",	0x2072}, | 
|  | {"supgt",	0x2c3}, | 
|  | {"suplt",	0x2c2}, | 
|  | {"sur",		0x2b3}, | 
|  | {"swing",	0x223c}, | 
|  | {"tau",		0x3c4}, | 
|  | {"taur",	0x2649}, | 
|  | {"th",		0xfe}, | 
|  | {"thbar",	0xfe},		/* +bar */ | 
|  | {"theta",	0x3b8}, | 
|  | {"thinqm",	0x3f},		/* should be thinner */ | 
|  | {"tilde",	LTIL}, | 
|  | {"times",	0xd7}, | 
|  | {"tri",		0x2206}, | 
|  | {"trli",	0x2016}, | 
|  | {"ts",		0x2009}, | 
|  | {"uacu",	0xfa}, | 
|  | {"uasper",	MUAS}, | 
|  | {"ubar",	0x75},		/* +bar */ | 
|  | {"ubreve",	0x16d}, | 
|  | {"ucirc",	0xfb}, | 
|  | {"udA",		0x2200}, | 
|  | {"udT",		0x22a5}, | 
|  | {"uda",		0x250}, | 
|  | {"udh",		0x265}, | 
|  | {"udqm",	0xbf}, | 
|  | {"udpsi",	0x22d4}, | 
|  | {"udtr",	0x2207}, | 
|  | {"ugrave",	0xf9}, | 
|  | {"ulenis",	MULN}, | 
|  | {"umac",	0x16b}, | 
|  | {"uml",		LUML}, | 
|  | {"undl",	0x2cd},		/* underline accent */ | 
|  | {"union",	0x222a}, | 
|  | {"upsilon",	0x3c5}, | 
|  | {"uuml",	0xfc}, | 
|  | {"vavpath",	0x5d5},		/* vav U+05D5 (+patah) */ | 
|  | {"vavsheva",	0x5d5},		/* vav U+05D5 (+sheva) */ | 
|  | {"vb",		0x7c}, | 
|  | {"vddd",	0x22ee}, | 
|  | {"versicle2",	0x2123},		/* U+2123 */ | 
|  | {"vinc",	0xaf}, | 
|  | {"virgo",	0x264d}, | 
|  | {"vpal",	0x25f}, | 
|  | {"vvf",		0x263}, | 
|  | {"wasper",	MWAS}, | 
|  | {"wavyeq",	0x2248}, | 
|  | {"wlenis",	MWLN}, | 
|  | {"wyn",		0x1bf},		/* wynn U+01BF */ | 
|  | {"xi",		0x3be}, | 
|  | {"yacu",	0xfd}, | 
|  | {"ycirc",	0x177}, | 
|  | {"ygh",		0x292}, | 
|  | {"ymac",	0x79},		/* +macron */ | 
|  | {"yuml",	0xff}, | 
|  | {"zced",	0x7a},		/* +cedilla */ | 
|  | {"zeta",	0x3b6}, | 
|  | {"zh",		0x292}, | 
|  | {"zhacek",	0x17e} | 
|  | }; | 
|  | /* | 
|  | The following special characters don't have close enough | 
|  | equivalents in Unicode, so aren't in the above table. | 
|  | 22n		2^(2^n) Cf Fermat | 
|  | 2on4		2/4 | 
|  | 3on8		3/8 | 
|  | Bantuo		Bantu O. Cf Otshi-herero | 
|  | Car		C with circular arrow on top | 
|  | albrtime 	cut-time: C with vertical line | 
|  | ardal		Cf dental | 
|  | bantuo		Bantu o. Cf Otshi-herero | 
|  | bbc1		single chem bond below | 
|  | bbc2		double chem bond below | 
|  | bbl1		chem bond like / | 
|  | bbl2		chem bond like // | 
|  | bbr1		chem bond like \ | 
|  | bbr2		chem bond \\ | 
|  | bcop1		copper symbol. Cf copper | 
|  | bcop2		copper symbol. Cf copper | 
|  | benchm		Cf benchmark | 
|  | btc1		single chem bond above | 
|  | btc2		double chem bond above | 
|  | btl1		chem bond like \ | 
|  | btl2		chem bond like \\ | 
|  | btr1		chem bond like / | 
|  | btr2		chem bond line // | 
|  | burman		Cf Burman | 
|  | devph		sanskrit letter. Cf ph | 
|  | devrfls		sanskrit letter. Cf cerebral | 
|  | duplong[12]	musical note | 
|  | egchi		early form of chi | 
|  | eggamma[12]	early form of gamma | 
|  | egiota		early form of iota | 
|  | egkappa		early form of kappa | 
|  | eglambda	early form of lambda | 
|  | egmu[12]	early form of mu | 
|  | egnu[12]	early form of nu | 
|  | egpi[123]	early form of pi | 
|  | egrho[12]	early form of rho | 
|  | egsampi		early form of sampi | 
|  | egsan		early form of san | 
|  | egsigma[12]	early form of sigma | 
|  | egxi[123]	early form of xi | 
|  | elatS		early form of S | 
|  | elatc[12]	early form of C | 
|  | elatg[12]	early form of G | 
|  | glagjeri	Slavonic Glagolitic jeri | 
|  | glagjeru	Slavonic Glagolitic jeru | 
|  | hypolem		hypolemisk (line with underdot) | 
|  | lhrbr		lower half } | 
|  | longmord	long mordent | 
|  | mbwvow		backwards scretched C. Cf retract. | 
|  | mord		music symbol.  Cf mordent | 
|  | mostra		Cf direct | 
|  | ohgcirc		old form of circumflex | 
|  | oldbeta		old form of β. Cf perturbate | 
|  | oldsemibr[12]	old forms of semibreve. Cf prolation | 
|  | ormg		old form of g. Cf G | 
|  | para[12345]	form of ¶ | 
|  | pauseo		musical pause sign | 
|  | pauseu		musical pause sign | 
|  | pharyng		Cf pharyngal | 
|  | ragr		Black letter ragged r | 
|  | repetn		musical repeat. Cf retort | 
|  | segno		musical segno sign | 
|  | semain[12]	semitic ain | 
|  | semhe		semitic he | 
|  | semheth		semitic heth | 
|  | semkaph		semitic kaph | 
|  | semlamed[12]	semitic lamed | 
|  | semmem		semitic mem | 
|  | semnum		semitic nun | 
|  | sempe		semitic pe | 
|  | semqoph[123]	semitic qoph | 
|  | semresh		semitic resh | 
|  | semtav[1234]	semitic tav | 
|  | semyod		semitic yod | 
|  | semzayin[123]	semitic zayin | 
|  | shtlong[12]	U with underbar. Cf glyconic | 
|  | sigmatau	σ,τ combination | 
|  | squaver		sixteenth note | 
|  | sqbreve		square musical breve note | 
|  | swast		swastika | 
|  | uhrbr		upper half of big } | 
|  | versicle1		Cf versicle | 
|  | */ | 
|  |  | 
|  |  | 
|  | static Rune normtab[128] = { | 
|  | /*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/ | 
|  | /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'', | 
|  | 0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f, | 
|  | /*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37, | 
|  | 0x38,	0x39,	0x3a,	0x3b,	TAGS,	0x3d,	TAGE,	0x3f, | 
|  | /*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47, | 
|  | 0x48,	0x49,	0x4a,	0x4b,	'L',	0x4d,	0x4e,	0x4f, | 
|  | /*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57, | 
|  | 0x58,	0x59,	0x5a,	0x5b,	'\\',	0x5d,	0x5e,	0x5f, | 
|  | /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67, | 
|  | 0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f, | 
|  | /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77, | 
|  | 0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE | 
|  | }; | 
|  | static Rune phtab[128] = { | 
|  | /*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/ | 
|  | /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | /*20*/	0x20,	0x21,	0x2c8,	0x23,	0x24,	0x2cc,	0xe6,	'\'', | 
|  | 0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f, | 
|  | /*30*/  0x30,	0x31,	0x32,	0x25c,	0x34,	0x35,	0x36,	0x37, | 
|  | 0x38,	0xf8,	0x2d0,	0x3b,	TAGS,	0x3d,	TAGE,	0x3f, | 
|  | /*40*/  0x259,	0x251,	0x42,	0x43,	0xf0,	0x25b,	0x46,	0x47, | 
|  | 0x48,	0x26a,	0x4a,	0x4b,	'L',	0x4d,	0x14b,	0x254, | 
|  | /*50*/	0x50,	0x252,	0x52,	0x283,	0x3b8,	0x28a,	0x28c,	0x57, | 
|  | 0x58,	0x59,	0x292,	0x5b,	'\\',	0x5d,	0x5e,	0x5f, | 
|  | /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67, | 
|  | 0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f, | 
|  | /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77, | 
|  | 0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE | 
|  | }; | 
|  | static Rune grtab[128] = { | 
|  | /*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/ | 
|  | /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'', | 
|  | 0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f, | 
|  | /*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37, | 
|  | 0x38,	0x39,	0x3a,	0x3b,	TAGS,	0x3d,	TAGE,	0x3f, | 
|  | /*40*/  0x40,	0x391,	0x392,	0x39e,	0x394,	0x395,	0x3a6,	0x393, | 
|  | 0x397,	0x399,	0x3da,	0x39a,	0x39b,	0x39c,	0x39d,	0x39f, | 
|  | /*50*/	0x3a0,	0x398,	0x3a1,	0x3a3,	0x3a4,	0x3a5,	0x56,	0x3a9, | 
|  | 0x3a7,	0x3a8,	0x396,	0x5b,	'\\',	0x5d,	0x5e,	0x5f, | 
|  | /*60*/	0x60,	0x3b1,	0x3b2,	0x3be,	0x3b4,	0x3b5,	0x3c6,	0x3b3, | 
|  | 0x3b7,	0x3b9,	0x3c2,	0x3ba,	0x3bb,	0x3bc,	0x3bd,	0x3bf, | 
|  | /*70*/	0x3c0,	0x3b8,	0x3c1,	0x3c3,	0x3c4,	0x3c5,	0x76,	0x3c9, | 
|  | 0x3c7,	0x3c8,	0x3b6,	0x7b,	0x7c,	0x7d,	0x7e,	NONE | 
|  | }; | 
|  | static Rune subtab[128] = { | 
|  | /*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/ | 
|  | /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'', | 
|  | 0x208d,	0x208e,	0x2a,	0x208a,	0x2c,	0x208b,	0x2e,	0x2f, | 
|  | /*30*/  0x2080,	0x2081,	0x2082,	0x2083,	0x2084,	0x2085,	0x2086,	0x2087, | 
|  | 0x2088,	0x2089,	0x3a,	0x3b,	TAGS,	0x208c,	TAGE,	0x3f, | 
|  | /*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47, | 
|  | 0x48,	0x49,	0x4a,	0x4b,	'L',	0x4d,	0x4e,	0x4f, | 
|  | /*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57, | 
|  | 0x58,	0x59,	0x5a,	0x5b,	'\\',	0x5d,	0x5e,	0x5f, | 
|  | /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67, | 
|  | 0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f, | 
|  | /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77, | 
|  | 0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE | 
|  | }; | 
|  | static Rune suptab[128] = { | 
|  | /*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/ | 
|  | /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
|  | /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'', | 
|  | 0x207d,	0x207e,	0x2a,	0x207a,	0x2c,	0x207b,	0x2e,	0x2f, | 
|  | /*30*/  0x2070,	0x2071,	0x2072,	0x2073,	0x2074,	0x2075,	0x2076,	0x2077, | 
|  | 0x2078,	0x2079,	0x3a,	0x3b,	TAGS,	0x207c,	TAGE,	0x3f, | 
|  | /*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47, | 
|  | 0x48,	0x49,	0x4a,	0x4b,	'L',	0x4d,	0x4e,	0x4f, | 
|  | /*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57, | 
|  | 0x58,	0x59,	0x5a,	0x5b,	'\\',	0x5d,	0x5e,	0x5f, | 
|  | /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67, | 
|  | 0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f, | 
|  | /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77, | 
|  | 0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE | 
|  | }; | 
|  |  | 
|  | static int	tagstarts; | 
|  | static char	tag[Buflen]; | 
|  | static int	naux; | 
|  | static char	auxname[Maxaux][Buflen]; | 
|  | static char	auxval[Maxaux][Buflen]; | 
|  | static char	spec[Buflen]; | 
|  | static char	*auxstate[Naux];	/* vals for most recent tag */ | 
|  | static Entry	curentry; | 
|  | #define cursize (curentry.end-curentry.start) | 
|  |  | 
|  | static char	*getspec(char *, char *); | 
|  | static char	*gettag(char *, char *); | 
|  | static void	dostatus(void); | 
|  |  | 
|  | /* | 
|  | * cmd is one of: | 
|  | *    'p': normal print | 
|  | *    'h': just print headwords | 
|  | *    'P': print raw | 
|  | */ | 
|  | void | 
|  | oedprintentry(Entry e, int cmd) | 
|  | { | 
|  | char *p, *pe; | 
|  | int t, a, i; | 
|  | long r, rprev, rlig; | 
|  | Rune *transtab; | 
|  |  | 
|  | p = e.start; | 
|  | pe = e.end; | 
|  | transtab = normtab; | 
|  | rprev = NONE; | 
|  | changett(0, 0, 0); | 
|  | curentry = e; | 
|  | if(cmd == 'h') | 
|  | outinhibit = 1; | 
|  | while(p < pe) { | 
|  | if(cmd == 'r') { | 
|  | outchar(*p++); | 
|  | continue; | 
|  | } | 
|  | r = transtab[(*p++)&0x7F]; | 
|  | if(r < NONE) { | 
|  | /* Emit the rune, but buffer in case of ligature */ | 
|  | if(rprev != NONE) | 
|  | outrune(rprev); | 
|  | rprev = r; | 
|  | } else if(r == SPCS) { | 
|  | /* Start of special character name */ | 
|  | p = getspec(p, pe); | 
|  | r = lookassoc(spectab, asize(spectab), spec); | 
|  | if(r == -1) { | 
|  | if(debug) | 
|  | err("spec %ld %d %s", | 
|  | e.doff, cursize, spec); | 
|  | r = 0xfffd; | 
|  | } | 
|  | if(r >= LIGS && r < LIGE) { | 
|  | /* handle possible ligature */ | 
|  | rlig = liglookup(r, rprev); | 
|  | if(rlig != NONE) | 
|  | rprev = rlig;	/* overwrite rprev */ | 
|  | else { | 
|  | /* could print accent, but let's not */ | 
|  | if(rprev != NONE) outrune(rprev); | 
|  | rprev = NONE; | 
|  | } | 
|  | } else if(r >= MULTI && r < MULTIE) { | 
|  | if(rprev != NONE) { | 
|  | outrune(rprev); | 
|  | rprev = NONE; | 
|  | } | 
|  | outrunes(multitab[r-MULTI]); | 
|  | } else if(r == PAR) { | 
|  | if(rprev != NONE) { | 
|  | outrune(rprev); | 
|  | rprev = NONE; | 
|  | } | 
|  | outnl(1); | 
|  | } else { | 
|  | if(rprev != NONE) outrune(rprev); | 
|  | rprev = r; | 
|  | } | 
|  | } else if(r == TAGS) { | 
|  | /* Start of tag name */ | 
|  | if(rprev != NONE) { | 
|  | outrune(rprev); | 
|  | rprev = NONE; | 
|  | } | 
|  | p = gettag(p, pe); | 
|  | t = lookassoc(tagtab, asize(tagtab), tag); | 
|  | if(t == -1) { | 
|  | if(debug) | 
|  | err("tag %ld %d %s", | 
|  | e.doff, cursize, tag); | 
|  | continue; | 
|  | } | 
|  | for(i = 0; i < Naux; i++) | 
|  | auxstate[i] = 0; | 
|  | for(i = 0; i < naux; i++) { | 
|  | a = lookassoc(auxtab, asize(auxtab), auxname[i]); | 
|  | if(a == -1) { | 
|  | if(debug) | 
|  | err("aux %ld %d %s", | 
|  | e.doff, cursize, auxname[i]); | 
|  | } else | 
|  | auxstate[a] = auxval[i]; | 
|  | } | 
|  | switch(t){ | 
|  | case E: | 
|  | case Ve: | 
|  | outnl(0); | 
|  | if(tagstarts) | 
|  | dostatus(); | 
|  | break; | 
|  | case Ed: | 
|  | case Etym: | 
|  | outchar(tagstarts? '[' : ']'); | 
|  | break; | 
|  | case Pr: | 
|  | outchar(tagstarts? '(' : ')'); | 
|  | break; | 
|  | case In: | 
|  | transtab = changett(transtab, subtab, tagstarts); | 
|  | break; | 
|  | case Hm: | 
|  | case Su: | 
|  | case Fq: | 
|  | transtab = changett(transtab, suptab, tagstarts); | 
|  | break; | 
|  | case Gk: | 
|  | transtab = changett(transtab, grtab, tagstarts); | 
|  | break; | 
|  | case Ph: | 
|  | transtab = changett(transtab, phtab, tagstarts); | 
|  | break; | 
|  | case Hw: | 
|  | if(cmd == 'h') { | 
|  | if(!tagstarts) | 
|  | outchar(' '); | 
|  | outinhibit = !tagstarts; | 
|  | } | 
|  | break; | 
|  | case S0: | 
|  | case S1: | 
|  | case S2: | 
|  | case S3: | 
|  | case S4: | 
|  | case S5: | 
|  | case S6: | 
|  | case S7a: | 
|  | case S7n: | 
|  | case Sn: | 
|  | case Sgk: | 
|  | if(tagstarts) { | 
|  | outnl(2); | 
|  | dostatus(); | 
|  | if(auxstate[Num]) { | 
|  | if(t == S3 || t == S5) { | 
|  | i = atoi(auxstate[Num]); | 
|  | while(i--) | 
|  | outchar('*'); | 
|  | outchars("  "); | 
|  | } else if(t == S7a || t == S7n || t == Sn) { | 
|  | outchar('('); | 
|  | outchars(auxstate[Num]); | 
|  | outchars(") "); | 
|  | } else if(t == Sgk) { | 
|  | i = grtab[(uchar)auxstate[Num][0]]; | 
|  | if(i != NONE) | 
|  | outrune(i); | 
|  | outchars(".  "); | 
|  | } else { | 
|  | outchars(auxstate[Num]); | 
|  | outchars(".  "); | 
|  | } | 
|  | } | 
|  | } | 
|  | break; | 
|  | case Cb: | 
|  | case Db: | 
|  | case Qp: | 
|  | case P: | 
|  | if(tagstarts) | 
|  | outnl(1); | 
|  | break; | 
|  | case Table: | 
|  | /* | 
|  | * Todo: gather columns, justify them, etc. | 
|  | * For now, just let colums come out as rows | 
|  | */ | 
|  | if(!tagstarts) | 
|  | outnl(0); | 
|  | break; | 
|  | case Col: | 
|  | if(tagstarts) | 
|  | outnl(0); | 
|  | break; | 
|  | case Dn: | 
|  | if(tagstarts) | 
|  | outchar('/'); | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  | if(cmd == 'h') { | 
|  | outinhibit = 0; | 
|  | outnl(0); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Return offset into bdict where next oed entry after fromoff starts. | 
|  | * Oed entries start with <e>, <ve>, <e st=...>, or <ve st=...> | 
|  | */ | 
|  | long | 
|  | oednextoff(long fromoff) | 
|  | { | 
|  | long a, n; | 
|  | int c; | 
|  |  | 
|  | a = Bseek(bdict, fromoff, 0); | 
|  | if(a < 0) | 
|  | return -1; | 
|  | n = 0; | 
|  | for(;;) { | 
|  | c = Bgetc(bdict); | 
|  | if(c < 0) | 
|  | break; | 
|  | if(c == '<') { | 
|  | c = Bgetc(bdict); | 
|  | if(c == 'e') { | 
|  | c = Bgetc(bdict); | 
|  | if(c == '>' || c == ' ') | 
|  | n = 3; | 
|  | } else if(c == 'v' && Bgetc(bdict) == 'e') { | 
|  | c = Bgetc(bdict); | 
|  | if(c == '>' || c == ' ') | 
|  | n = 4; | 
|  | } | 
|  | if(n) | 
|  | break; | 
|  | } | 
|  | } | 
|  | return (Boffset(bdict)-n); | 
|  | } | 
|  |  | 
|  | static char *prkey1 = | 
|  | "KEY TO THE PRONUNCIATION\n" | 
|  | "\n" | 
|  | "I. CONSONANTS\n" | 
|  | "b, d, f, k, l, m, n, p, t, v, z: usual English values\n" | 
|  | "\n" | 
|  | "g as in go (gəʊ)\n" | 
|  | "h  ...  ho! (həʊ)\n" | 
|  | "r  ...  run (rʌn), terrier (ˈtɛriə(r))\n" | 
|  | "(r)...  her (hɜː(r))\n" | 
|  | "s  ...  see (siː), success (səkˈsɜs)\n" | 
|  | "w  ...  wear (wɛə(r))\n" | 
|  | "hw ...  when (hwɛn)\n" | 
|  | "j  ...  yes (jɛs)\n" | 
|  | "θ  ...  thin (θin), bath (bɑːθ)\n" | 
|  | "ð  ...  then (ðɛn), bathe (beɪð)\n" | 
|  | "ʃ  ...  shop (ʃɒp), dish (dɪʃ)\n" | 
|  | "tʃ ...  chop (tʃɒp), ditch (dɪtʃ)\n" | 
|  | "ʒ  ...  vision (ˈvɪʒən), déjeuner (deʒøne)\n" | 
|  | ; | 
|  | static char *prkey2 = | 
|  | "dʒ ...  judge (dʒʌdʒ)\n" | 
|  | "ŋ  ...  singing (ˈsɪŋɪŋ), think (θiŋk)\n" | 
|  | "ŋg ...  finger (ˈfiŋgə(r))\n" | 
|  | "\n" | 
|  | "Foreign\n" | 
|  | "ʎ as in It. seraglio (serˈraʎo)\n" | 
|  | "ɲ  ...  Fr. cognac (kɔɲak)\n" | 
|  | "x  ...  Ger. ach (ax), Sc. loch (lɒx)\n" | 
|  | "ç  ...  Ger. ich (ɪç), Sc. nicht (nɪçt)\n" | 
|  | "ɣ  ...  North Ger. sagen (ˈzaːɣən)\n" | 
|  | "c  ...  Afrikaans baardmannetjie (ˈbaːrtmanəci)\n" | 
|  | "ɥ  ...  Fr. cuisine (kɥizin)\n" | 
|  | "\n" | 
|  | ; | 
|  | static char *prkey3 = | 
|  | "II. VOWELS AND DIPTHONGS\n" | 
|  | "\n" | 
|  | "Short\n" | 
|  | "ɪ as in pit (pɪt), -ness (-nɪs)\n" | 
|  | "ɛ  ...  pet (pɛt), Fr. sept (sɛt)\n" | 
|  | "æ  ...  pat (pæt)\n" | 
|  | "ʌ  ...  putt (pʌt)\n" | 
|  | "ɒ  ...  pot (pɒt)\n" | 
|  | "ʊ  ...  put (pʊt)\n" | 
|  | "ə  ...  another (əˈnʌðə(r))\n" | 
|  | "(ə)...  beaten (ˈbiːt(ə)n)\n" | 
|  | "i  ...  Fr. si (si)\n" | 
|  | "e  ...  Fr. bébé (bebe)\n" | 
|  | "a  ...  Fr. mari (mari)\n" | 
|  | "ɑ  ...  Fr. bâtiment (bɑtimã)\n" | 
|  | "ɔ  ...  Fr. homme (ɔm)\n" | 
|  | "o  ...  Fr. eau (o)\n" | 
|  | "ø  ...  Fr. peu (pø)\n" | 
|  | ; | 
|  | static char *prkey4 = | 
|  | "œ  ...  Fr. boeuf (bœf), coeur (kœr)\n" | 
|  | "u  ...  Fr. douce (dus)\n" | 
|  | "ʏ  ...  Ger. Müller (ˈmʏlər)\n" | 
|  | "y  ...  Fr. du (dy)\n" | 
|  | "\n" | 
|  | "Long\n" | 
|  | "iː as in bean (biːn)\n" | 
|  | "ɑː ...  barn (bɑːn)\n" | 
|  | "ɔː ...  born (bɔːn)\n" | 
|  | "uː ...  boon (buːn)\n" | 
|  | "ɜː ...  burn (bɜːn)\n" | 
|  | "eː ...  Ger. Schnee (ʃneː)\n" | 
|  | "ɛː ...  Ger. Fähre (ˈfɛːrə)\n" | 
|  | "aː ...  Ger. Tag (taːk)\n" | 
|  | "oː ...  Ger. Sohn (zoːn)\n" | 
|  | "øː ...  Ger. Goethe (gøːtə)\n" | 
|  | "yː ...  Ger. grün (gryːn)\n" | 
|  | "\n" | 
|  | ; | 
|  | static char *prkey5 = | 
|  | "Nasal\n" | 
|  | "ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n" | 
|  | "ã  ...  Fr. franc (frã)\n" | 
|  | "ɔ˜ ...  Fr. bon (bɔ˜n)\n" | 
|  | "œ˜ ...  Fr. un (œ˜)\n" | 
|  | "\n" | 
|  | "Dipthongs, etc.\n" | 
|  | "eɪ as in bay (beɪ)\n" | 
|  | "aɪ ...  buy (baɪ)\n" | 
|  | "ɔɪ ...  boy (bɔɪ)\n" | 
|  | "əʊ ...  no (nəʊ)\n" | 
|  | "aʊ ...  now (naʊ)\n" | 
|  | "ɪə ...  peer (pɪə(r))\n" | 
|  | "ɛə ...  pair (pɛə(r))\n" | 
|  | "ʊə ...  tour (tʊə(r))\n" | 
|  | "ɔə ...  boar (bɔə(r))\n" | 
|  | "\n" | 
|  | ; | 
|  | static char *prkey6 = | 
|  | "III. STRESS\n" | 
|  | "\n" | 
|  | "Main stress: ˈ preceding stressed syllable\n" | 
|  | "Secondary stress: ˌ preceding stressed syllable\n" | 
|  | "\n" | 
|  | "E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n"; | 
|  | /* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */ | 
|  |  | 
|  | void | 
|  | oedprintkey(void) | 
|  | { | 
|  | Bprint(bout, "%s%s%s%s%s%s", | 
|  | prkey1, prkey2, prkey3, prkey4, prkey5, prkey6); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * f points just after a '&', fe points at end of entry. | 
|  | * Accumulate the special name, starting after the & | 
|  | * and continuing until the next '.', in spec[]. | 
|  | * Return pointer to char after '.'. | 
|  | */ | 
|  | static char * | 
|  | getspec(char *f, char *fe) | 
|  | { | 
|  | char *t; | 
|  | int c, i; | 
|  |  | 
|  | t = spec; | 
|  | i = sizeof spec; | 
|  | while(--i > 0) { | 
|  | c = *f++; | 
|  | if(c == '.' || f == fe) | 
|  | break; | 
|  | *t++ = c; | 
|  | } | 
|  | *t = 0; | 
|  | return f; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * f points just after '<'; fe points at end of entry. | 
|  | * Expect next characters from bin to match: | 
|  | *  [/][^ >]+( [^>=]+=[^ >]+)*> | 
|  | *      tag   auxname auxval | 
|  | * Accumulate the tag and its auxilliary information in | 
|  | * tag[], auxname[][] and auxval[][]. | 
|  | * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0. | 
|  | * Set naux to the number of aux pairs found. | 
|  | * Return pointer to after final '>'. | 
|  | */ | 
|  | static char * | 
|  | gettag(char *f, char *fe) | 
|  | { | 
|  | char *t; | 
|  | int c, i; | 
|  |  | 
|  | t = tag; | 
|  | c = *f++; | 
|  | if(c == '/') | 
|  | tagstarts = 0; | 
|  | else { | 
|  | tagstarts = 1; | 
|  | *t++ = c; | 
|  | } | 
|  | i = Buflen; | 
|  | naux = 0; | 
|  | while(--i > 0) { | 
|  | c = *f++; | 
|  | if(c == '>' || f == fe) | 
|  | break; | 
|  | if(c == ' ') { | 
|  | *t = 0; | 
|  | t = auxname[naux]; | 
|  | i = Buflen; | 
|  | if(naux < Maxaux-1) | 
|  | naux++; | 
|  | } else if(naux && c == '=') { | 
|  | *t = 0; | 
|  | t = auxval[naux-1]; | 
|  | i = Buflen; | 
|  | } else | 
|  | *t++ = c; | 
|  | } | 
|  | *t = 0; | 
|  | return f; | 
|  | } | 
|  |  | 
|  | static void | 
|  | dostatus(void) | 
|  | { | 
|  | char *s; | 
|  |  | 
|  | s = auxstate[St]; | 
|  | if(s) { | 
|  | if(strcmp(s, "obs") == 0) | 
|  | outrune(0x2020); | 
|  | else if(strcmp(s, "ali") == 0) | 
|  | outrune(0x2016); | 
|  | else if(strcmp(s, "err") == 0 || strcmp(s, "spu") == 0) | 
|  | outrune(0xb6); | 
|  | else if(strcmp(s, "xref") == 0) | 
|  | {/* nothing */} | 
|  | else if(debug) | 
|  | err("status %ld %d %s", curentry.doff, cursize, s); | 
|  | } | 
|  | } |