| #include <u.h> |
| #include <libc.h> |
| #include <bio.h> |
| #include "dict.h" |
| |
| enum { |
| Buflen=1000, |
| Maxaux=5 |
| }; |
| |
| /* Possible tags */ |
| enum { |
| A, /* author in quote (small caps) */ |
| B, /* bold */ |
| Ba, /* author inside bib */ |
| Bch, /* builtup chem component */ |
| Bib, /* surrounds word 'in' for bibliographic ref */ |
| Bl, /* bold */ |
| Bo, /* bond over */ |
| Bu, /* bond under */ |
| Cb, /* ? block of stuff (indent) */ |
| Cf, /* cross ref to another entry (italics) */ |
| Chem, /* chemistry formula */ |
| Co, /* over (preceding sum, integral, etc.) */ |
| Col, /* column of table (aux just may be r) */ |
| Cu, /* under (preceding sum, integral, etc.) */ |
| Dat, /* date */ |
| Db, /* def block? indent */ |
| Dn, /* denominator of fraction */ |
| E, /* main entry */ |
| Ed, /* editor's comments (in [...]) */ |
| Etym, /* etymology (in [...]) */ |
| Fq, /* frequency count (superscript) */ |
| Form, /* formula */ |
| Fr, /* fraction (contains <nu>, then <dn>) */ |
| Gk, /* greek (transliteration) */ |
| Gr, /* grammar? (e.g., around 'pa.' in 'pa. pple.') */ |
| Hg, /* headword group */ |
| Hm, /* homonym (superscript) */ |
| Hw, /* headword (bold) */ |
| I, /* italics */ |
| Il, /* italic list? */ |
| In, /* inferior (subscript) */ |
| L, /* row of col of table */ |
| La, /* status or usage label (italic) */ |
| Lc, /* chapter/verse sort of thing for works */ |
| N, /* note (smaller type) */ |
| Nu, /* numerator of fraction */ |
| Ov, /* needs overline */ |
| P, /* paragraph (indent) */ |
| Ph, /* pronunciation (transliteration) */ |
| Pi, /* pile (frac without line) */ |
| Pqp, /* subblock of quote */ |
| Pr, /* pronunciation (in (...)) */ |
| Ps, /* position (e.g., adv.) (italic) */ |
| Pt, /* part (in lc) */ |
| Q, /* quote in quote block */ |
| Qd, /* quote date (bold) */ |
| Qig, /* quote number (greek) */ |
| Qla, /* status or usage label in quote (italic) */ |
| Qp, /* quote block (small type, indent) */ |
| Qsn, /* quote number */ |
| Qt, /* quote words */ |
| R, /* roman type style */ |
| Rx, /* relative cross reference (e.g., next) */ |
| S, /* another form? (italic) */ |
| S0, /* sense (sometimes surrounds several sx's) */ |
| S1, /* sense (aux num: indented bold letter) */ |
| S2, /* sense (aux num: indented bold capital rom num) */ |
| S3, /* sense (aux num: indented number of asterisks) */ |
| S4, /* sense (aux num: indented bold number) */ |
| S5, /* sense (aux num: indented number of asterisks) */ |
| S6, /* subsense (aux num: bold letter) */ |
| S7a, /* subsense (aux num: letter) */ |
| S7n, /* subsense (aux num: roman numeral) */ |
| Sc, /* small caps */ |
| Sgk, /* subsense (aux num: transliterated greek) */ |
| Sn, /* sense of subdefinition (aux num: roman letter) */ |
| Ss, /* sans serif */ |
| Ssb, /* sans serif bold */ |
| Ssi, /* sans serif italic */ |
| Su, /* superior (superscript) */ |
| Sub, /* subdefinition */ |
| Table, /* table (aux cols=number of columns) */ |
| Tt, /* title? (italics) */ |
| Vd, /* numeric label for variant form */ |
| Ve, /* variant entry */ |
| Vf, /* variant form (light bold) */ |
| Vfl, /* list of vf's (starts with Also or Forms) */ |
| W, /* work (e.g., Beowulf) (italics) */ |
| X, /* cross reference to main word (small caps) */ |
| Xd, /* cross reference to quotation by date */ |
| Xi, /* internal cross reference ? (italic) */ |
| Xid, /* cross reference identifer, in quote ? */ |
| Xs, /* cross reference sense (lower number) */ |
| Xr, /* list of x's */ |
| Ntag /* end of tags */ |
| }; |
| |
| /* Assoc tables must be sorted on first field */ |
| |
| static Assoc tagtab[] = { |
| {"a", A}, |
| {"b", B}, |
| {"ba", Ba}, |
| {"bch", Bch}, |
| {"bib", Bib}, |
| {"bl", Bl}, |
| {"bo", Bo}, |
| {"bu", Bu}, |
| {"cb", Cb}, |
| {"cf", Cf}, |
| {"chem", Chem}, |
| {"co", Co}, |
| {"col", Col}, |
| {"cu", Cu}, |
| {"dat", Dat}, |
| {"db", Db}, |
| {"dn", Dn}, |
| {"e", E}, |
| {"ed", Ed}, |
| {"et", Etym}, |
| {"etym", Etym}, |
| {"form", Form}, |
| {"fq", Fq}, |
| {"fr", Fr}, |
| {"frac", Fr}, |
| {"gk", Gk}, |
| {"gr", Gr}, |
| {"hg", Hg}, |
| {"hm", Hm}, |
| {"hw", Hw}, |
| {"i", I}, |
| {"il", Il}, |
| {"in", In}, |
| {"l", L}, |
| {"la", La}, |
| {"lc", Lc}, |
| {"n", N}, |
| {"nu", Nu}, |
| {"ov", Ov}, |
| {"p", P}, |
| {"ph", Ph}, |
| {"pi", Pi}, |
| {"pqp", Pqp}, |
| {"pr", Pr}, |
| {"ps", Ps}, |
| {"pt", Pt}, |
| {"q", Q}, |
| {"qd", Qd}, |
| {"qig", Qig}, |
| {"qla", Qla}, |
| {"qp", Qp}, |
| {"qsn", Qsn}, |
| {"qt", Qt}, |
| {"r", R}, |
| {"rx", Rx}, |
| {"s", S}, |
| {"s0", S0}, |
| {"s1", S1}, |
| {"s2", S2}, |
| {"s3", S3}, |
| {"s4", S4}, |
| {"s5", S5}, |
| {"s6", S6}, |
| {"s7a", S7a}, |
| {"s7n", S7n}, |
| {"sc", Sc}, |
| {"sgk", Sgk}, |
| {"sn", Sn}, |
| {"ss", Ss,}, |
| {"ssb", Ssb}, |
| {"ssi", Ssi}, |
| {"su", Su}, |
| {"sub", Sub}, |
| {"table", Table}, |
| {"tt", Tt}, |
| {"vd", Vd}, |
| {"ve", Ve}, |
| {"vf", Vf}, |
| {"vfl", Vfl}, |
| {"w", W}, |
| {"x", X}, |
| {"xd", Xd}, |
| {"xi", Xi}, |
| {"xid", Xid}, |
| {"xr", Xr}, |
| {"xs", Xs} |
| }; |
| |
| /* Possible tag auxilliary info */ |
| enum { |
| Cols, /* number of columns in a table */ |
| Num, /* letter or number, for a sense */ |
| St, /* status (e.g., obs) */ |
| Naux |
| }; |
| |
| static Assoc auxtab[] = { |
| {"cols", Cols}, |
| {"num", Num}, |
| {"st", St} |
| }; |
| |
| static Assoc spectab[] = { |
| {"3on4", 0xbe}, |
| {"Aacu", 0xc1}, |
| {"Aang", 0xc5}, |
| {"Abarab", 0x100}, |
| {"Acirc", 0xc2}, |
| {"Ae", 0xc6}, |
| {"Agrave", 0xc0}, |
| {"Alpha", 0x391}, |
| {"Amac", 0x100}, |
| {"Asg", 0x1b7}, /* Unicyle. Cf "Sake" */ |
| {"Auml", 0xc4}, |
| {"Beta", 0x392}, |
| {"Cced", 0xc7}, |
| {"Chacek", 0x10c}, |
| {"Chi", 0x3a7}, |
| {"Chirho", 0x2627}, /* Chi Rho U+2627 */ |
| {"Csigma", 0x3da}, |
| {"Delta", 0x394}, |
| {"Eacu", 0xc9}, |
| {"Ecirc", 0xca}, |
| {"Edh", 0xd0}, |
| {"Epsilon", 0x395}, |
| {"Eta", 0x397}, |
| {"Gamma", 0x393}, |
| {"Iacu", 0xcd}, |
| {"Icirc", 0xce}, |
| {"Imac", 0x12a}, |
| {"Integ", 0x222b}, |
| {"Iota", 0x399}, |
| {"Kappa", 0x39a}, |
| {"Koppa", 0x3de}, |
| {"Lambda", 0x39b}, |
| {"Lbar", 0x141}, |
| {"Mu", 0x39c}, |
| {"Naira", 0x4e}, /* should have bar through */ |
| {"Nplus", 0x4e}, /* should have plus above */ |
| {"Ntilde", 0xd1}, |
| {"Nu", 0x39d}, |
| {"Oacu", 0xd3}, |
| {"Obar", 0xd8}, |
| {"Ocirc", 0xd4}, |
| {"Oe", 0x152}, |
| {"Omega", 0x3a9}, |
| {"Omicron", 0x39f}, |
| {"Ouml", 0xd6}, |
| {"Phi", 0x3a6}, |
| {"Pi", 0x3a0}, |
| {"Psi", 0x3a8}, |
| {"Rho", 0x3a1}, |
| {"Sacu", 0x15a}, |
| {"Sigma", 0x3a3}, |
| {"Summ", 0x2211}, |
| {"Tau", 0x3a4}, |
| {"Th", 0xde}, |
| {"Theta", 0x398}, |
| {"Tse", 0x426}, |
| {"Uacu", 0xda}, |
| {"Ucirc", 0xdb}, |
| {"Upsilon", 0x3a5}, |
| {"Uuml", 0xdc}, |
| {"Wyn", 0x1bf}, /* wynn U+01BF */ |
| {"Xi", 0x39e}, |
| {"Ygh", 0x1b7}, /* Yogh U+01B7 */ |
| {"Zeta", 0x396}, |
| {"Zh", 0x1b7}, /* looks like Yogh. Cf "Sake" */ |
| {"a", 0x61}, /* ante */ |
| {"aacu", 0xe1}, |
| {"aang", 0xe5}, |
| {"aasper", MAAS}, |
| {"abreve", 0x103}, |
| {"acirc", 0xe2}, |
| {"acu", LACU}, |
| {"ae", 0xe6}, |
| {"agrave", 0xe0}, |
| {"ahook", 0x105}, |
| {"alenis", MALN}, |
| {"alpha", 0x3b1}, |
| {"amac", 0x101}, |
| {"amp", 0x26}, |
| {"and", MAND}, |
| {"ang", LRNG}, |
| {"angle", 0x2220}, |
| {"ankh", 0x2625}, /* ankh U+2625 */ |
| {"ante", 0x61}, /* before (year) */ |
| {"aonq", MAOQ}, |
| {"appreq", 0x2243}, |
| {"aquar", 0x2652}, |
| {"arDadfull", 0x636}, /* Dad U+0636 */ |
| {"arHa", 0x62d}, /* haa U+062D */ |
| {"arTa", 0x62a}, /* taa U+062A */ |
| {"arain", 0x639}, /* ain U+0639 */ |
| {"arainfull", 0x639}, /* ain U+0639 */ |
| {"aralif", 0x627}, /* alef U+0627 */ |
| {"arba", 0x628}, /* baa U+0628 */ |
| {"arha", 0x647}, /* ha U+0647 */ |
| {"aries", 0x2648}, |
| {"arnun", 0x646}, /* noon U+0646 */ |
| {"arnunfull", 0x646}, /* noon U+0646 */ |
| {"arpa", 0x647}, /* ha U+0647 */ |
| {"arqoph", 0x642}, /* qaf U+0642 */ |
| {"arshinfull", 0x634}, /* sheen U+0634 */ |
| {"arta", 0x62a}, /* taa U+062A */ |
| {"artafull", 0x62a}, /* taa U+062A */ |
| {"artha", 0x62b}, /* thaa U+062B */ |
| {"arwaw", 0x648}, /* waw U+0648 */ |
| {"arya", 0x64a}, /* ya U+064A */ |
| {"aryafull", 0x64a}, /* ya U+064A */ |
| {"arzero", 0x660}, /* indic zero U+0660 */ |
| {"asg", 0x292}, /* unicycle character. Cf "hallow" */ |
| {"asper", LASP}, |
| {"assert", 0x22a2}, |
| {"astm", 0x2042}, /* asterism: should be upside down */ |
| {"at", 0x40}, |
| {"atilde", 0xe3}, |
| {"auml", 0xe4}, |
| {"ayin", 0x639}, /* arabic ain U+0639 */ |
| {"b1", 0x2d}, /* single bond */ |
| {"b2", 0x3d}, /* double bond */ |
| {"b3", 0x2261}, /* triple bond */ |
| {"bbar", 0x180}, /* b with bar U+0180 */ |
| {"beta", 0x3b2}, |
| {"bigobl", 0x2f}, |
| {"blC", 0x43}, /* should be black letter */ |
| {"blJ", 0x4a}, /* should be black letter */ |
| {"blU", 0x55}, /* should be black letter */ |
| {"blb", 0x62}, /* should be black letter */ |
| {"blozenge", 0x25ca}, /* U+25CA; should be black */ |
| {"bly", 0x79}, /* should be black letter */ |
| {"bra", MBRA}, |
| {"brbl", LBRB}, |
| {"breve", LBRV}, |
| {"bslash", '\\'}, |
| {"bsquare", 0x25a0}, /* black square U+25A0 */ |
| {"btril", 0x25c0}, /* U+25C0 */ |
| {"btrir", 0x25b6}, /* U+25B6 */ |
| {"c", 0x63}, /* circa */ |
| {"cab", 0x232a}, |
| {"cacu", 0x107}, |
| {"canc", 0x264b}, |
| {"capr", 0x2651}, |
| {"caret", 0x5e}, |
| {"cb", 0x7d}, |
| {"cbigb", 0x7d}, |
| {"cbigpren", 0x29}, |
| {"cbigsb", 0x5d}, |
| {"cced", 0xe7}, |
| {"cdil", LCED}, |
| {"cdsb", 0x301b}, /* ]] U+301b */ |
| {"cent", 0xa2}, |
| {"chacek", 0x10d}, |
| {"chi", 0x3c7}, |
| {"circ", LRNG}, |
| {"circa", 0x63}, /* about (year) */ |
| {"circbl", 0x325}, /* ring below accent U+0325 */ |
| {"circle", 0x25cb}, /* U+25CB */ |
| {"circledot", 0x2299}, |
| {"click", 0x296}, |
| {"club", 0x2663}, |
| {"comtime", 0x43}, |
| {"conj", 0x260c}, |
| {"cprt", 0xa9}, |
| {"cq", '\''}, |
| {"cqq", 0x201d}, |
| {"cross", 0x2720}, /* maltese cross U+2720 */ |
| {"crotchet", 0x2669}, |
| {"csb", 0x5d}, |
| {"ctilde", 0x63}, /* +tilde */ |
| {"ctlig", MLCT}, |
| {"cyra", 0x430}, |
| {"cyre", 0x435}, |
| {"cyrhard", 0x44a}, |
| {"cyrjat", 0x463}, |
| {"cyrm", 0x43c}, |
| {"cyrn", 0x43d}, |
| {"cyrr", 0x440}, |
| {"cyrsoft", 0x44c}, |
| {"cyrt", 0x442}, |
| {"cyry", 0x44b}, |
| {"dag", 0x2020}, |
| {"dbar", 0x111}, |
| {"dblar", 0x21cb}, |
| {"dblgt", 0x226b}, |
| {"dbllt", 0x226a}, |
| {"dced", 0x64}, /* +cedilla */ |
| {"dd", MDD}, |
| {"ddag", 0x2021}, |
| {"ddd", MDDD}, |
| {"decr", 0x2193}, |
| {"deg", 0xb0}, |
| {"dele", 0x64}, /* should be dele */ |
| {"delta", 0x3b4}, |
| {"descnode", 0x260b}, /* descending node U+260B */ |
| {"diamond", 0x2662}, |
| {"digamma", 0x3dd}, |
| {"div", 0xf7}, |
| {"dlessi", 0x131}, |
| {"dlessj1", 0x6a}, /* should be dotless */ |
| {"dlessj2", 0x6a}, /* should be dotless */ |
| {"dlessj3", 0x6a}, /* should be dotless */ |
| {"dollar", 0x24}, |
| {"dotab", LDOT}, |
| {"dotbl", LDTB}, |
| {"drachm", 0x292}, |
| {"dubh", 0x2d}, |
| {"eacu", 0xe9}, |
| {"earth", 0x2641}, |
| {"easper", MEAS}, |
| {"ebreve", 0x115}, |
| {"ecirc", 0xea}, |
| {"edh", 0xf0}, |
| {"egrave", 0xe8}, |
| {"ehacek", 0x11b}, |
| {"ehook", 0x119}, |
| {"elem", 0x220a}, |
| {"elenis", MELN}, |
| {"em", 0x2014}, |
| {"emac", 0x113}, |
| {"emem", MEMM}, |
| {"en", 0x2013}, |
| {"epsilon", 0x3b5}, |
| {"equil", 0x21cb}, |
| {"ergo", 0x2234}, |
| {"es", MES}, |
| {"eszett", 0xdf}, |
| {"eta", 0x3b7}, |
| {"eth", 0xf0}, |
| {"euml", 0xeb}, |
| {"expon", 0x2191}, |
| {"fact", 0x21}, |
| {"fata", 0x251}, |
| {"fatpara", 0xb6}, /* should have fatter, filled in bowl */ |
| {"female", 0x2640}, |
| {"ffilig", MLFFI}, |
| {"fflig", MLFF}, |
| {"ffllig", MLFFL}, |
| {"filig", MLFI}, |
| {"flat", 0x266d}, |
| {"fllig", MLFL}, |
| {"frE", 0x45}, /* should be curly */ |
| {"frL", 'L'}, /* should be curly */ |
| {"frR", 0x52}, /* should be curly */ |
| {"frakB", 0x42}, /* should have fraktur style */ |
| {"frakG", 0x47}, |
| {"frakH", 0x48}, |
| {"frakI", 0x49}, |
| {"frakM", 0x4d}, |
| {"frakU", 0x55}, |
| {"frakX", 0x58}, |
| {"frakY", 0x59}, |
| {"frakh", 0x68}, |
| {"frbl", LFRB}, |
| {"frown", LFRN}, |
| {"fs", 0x20}, |
| {"fsigma", 0x3c2}, |
| {"gAacu", 0xc1}, /* should be Α+acute */ |
| {"gaacu", 0x3b1}, /* +acute */ |
| {"gabreve", 0x3b1}, /* +breve */ |
| {"gafrown", 0x3b1}, /* +frown */ |
| {"gagrave", 0x3b1}, /* +grave */ |
| {"gamac", 0x3b1}, /* +macron */ |
| {"gamma", 0x3b3}, |
| {"gauml", 0x3b1}, /* +umlaut */ |
| {"ge", 0x2267}, |
| {"geacu", 0x3b5}, /* +acute */ |
| {"gegrave", 0x3b5}, /* +grave */ |
| {"ghacu", 0x3b7}, /* +acute */ |
| {"ghfrown", 0x3b7}, /* +frown */ |
| {"ghgrave", 0x3b7}, /* +grave */ |
| {"ghmac", 0x3b7}, /* +macron */ |
| {"giacu", 0x3b9}, /* +acute */ |
| {"gibreve", 0x3b9}, /* +breve */ |
| {"gifrown", 0x3b9}, /* +frown */ |
| {"gigrave", 0x3b9}, /* +grave */ |
| {"gimac", 0x3b9}, /* +macron */ |
| {"giuml", 0x3b9}, /* +umlaut */ |
| {"glagjat", 0x467}, |
| {"glots", 0x2c0}, |
| {"goacu", 0x3bf}, /* +acute */ |
| {"gobreve", 0x3bf}, /* +breve */ |
| {"grave", LGRV}, |
| {"gt", 0x3e}, |
| {"guacu", 0x3c5}, /* +acute */ |
| {"gufrown", 0x3c5}, /* +frown */ |
| {"gugrave", 0x3c5}, /* +grave */ |
| {"gumac", 0x3c5}, /* +macron */ |
| {"guuml", 0x3c5}, /* +umlaut */ |
| {"gwacu", 0x3c9}, /* +acute */ |
| {"gwfrown", 0x3c9}, /* +frown */ |
| {"gwgrave", 0x3c9}, /* +grave */ |
| {"hacek", LHCK}, |
| {"halft", 0x2308}, |
| {"hash", 0x23}, |
| {"hasper", MHAS}, |
| {"hatpath", 0x5b2}, /* hataf patah U+05B2 */ |
| {"hatqam", 0x5b3}, /* hataf qamats U+05B3 */ |
| {"hatseg", 0x5b1}, /* hataf segol U+05B1 */ |
| {"hbar", 0x127}, |
| {"heart", 0x2661}, |
| {"hebaleph", 0x5d0}, /* aleph U+05D0 */ |
| {"hebayin", 0x5e2}, /* ayin U+05E2 */ |
| {"hebbet", 0x5d1}, /* bet U+05D1 */ |
| {"hebbeth", 0x5d1}, /* bet U+05D1 */ |
| {"hebcheth", 0x5d7}, /* bet U+05D7 */ |
| {"hebdaleth", 0x5d3}, /* dalet U+05D3 */ |
| {"hebgimel", 0x5d2}, /* gimel U+05D2 */ |
| {"hebhe", 0x5d4}, /* he U+05D4 */ |
| {"hebkaph", 0x5db}, /* kaf U+05DB */ |
| {"heblamed", 0x5dc}, /* lamed U+05DC */ |
| {"hebmem", 0x5de}, /* mem U+05DE */ |
| {"hebnun", 0x5e0}, /* nun U+05E0 */ |
| {"hebnunfin", 0x5df}, /* final nun U+05DF */ |
| {"hebpe", 0x5e4}, /* pe U+05E4 */ |
| {"hebpedag", 0x5e3}, /* final pe? U+05E3 */ |
| {"hebqoph", 0x5e7}, /* qof U+05E7 */ |
| {"hebresh", 0x5e8}, /* resh U+05E8 */ |
| {"hebshin", 0x5e9}, /* shin U+05E9 */ |
| {"hebtav", 0x5ea}, /* tav U+05EA */ |
| {"hebtsade", 0x5e6}, /* tsadi U+05E6 */ |
| {"hebwaw", 0x5d5}, /* vav? U+05D5 */ |
| {"hebyod", 0x5d9}, /* yod U+05D9 */ |
| {"hebzayin", 0x5d6}, /* zayin U+05D6 */ |
| {"hgz", 0x292}, /* ??? Cf "alet" */ |
| {"hireq", 0x5b4}, /* U+05B4 */ |
| {"hlenis", MHLN}, |
| {"hook", LOGO}, |
| {"horizE", 0x45}, /* should be on side */ |
| {"horizP", 0x50}, /* should be on side */ |
| {"horizS", 0x223d}, |
| {"horizT", 0x22a3}, |
| {"horizb", 0x7b}, /* should be underbrace */ |
| {"ia", 0x3b1}, |
| {"iacu", 0xed}, |
| {"iasper", MIAS}, |
| {"ib", 0x3b2}, |
| {"ibar", 0x268}, |
| {"ibreve", 0x12d}, |
| {"icirc", 0xee}, |
| {"id", 0x3b4}, |
| {"ident", 0x2261}, |
| {"ie", 0x3b5}, |
| {"ifilig", MLFI}, |
| {"ifflig", MLFF}, |
| {"ig", 0x3b3}, |
| {"igrave", 0xec}, |
| {"ih", 0x3b7}, |
| {"ii", 0x3b9}, |
| {"ik", 0x3ba}, |
| {"ilenis", MILN}, |
| {"imac", 0x12b}, |
| {"implies", 0x21d2}, |
| {"index", 0x261e}, |
| {"infin", 0x221e}, |
| {"integ", 0x222b}, |
| {"intsec", 0x2229}, |
| {"invpri", 0x2cf}, |
| {"iota", 0x3b9}, |
| {"iq", 0x3c8}, |
| {"istlig", MLST}, |
| {"isub", 0x3f5}, /* iota below accent */ |
| {"iuml", 0xef}, |
| {"iz", 0x3b6}, |
| {"jup", 0x2643}, |
| {"kappa", 0x3ba}, |
| {"koppa", 0x3df}, |
| {"lambda", 0x3bb}, |
| {"lar", 0x2190}, |
| {"lbar", 0x142}, |
| {"le", 0x2266}, |
| {"lenis", LLEN}, |
| {"leo", 0x264c}, |
| {"lhalfbr", 0x2308}, |
| {"lhshoe", 0x2283}, |
| {"libra", 0x264e}, |
| {"llswing", MLLS}, |
| {"lm", 0x2d0}, |
| {"logicand", 0x2227}, |
| {"logicor", 0x2228}, |
| {"longs", 0x283}, |
| {"lrar", 0x2194}, |
| {"lt", 0x3c}, |
| {"ltappr", 0x227e}, |
| {"ltflat", 0x2220}, |
| {"lumlbl", 0x6c}, /* +umlaut below */ |
| {"mac", LMAC}, |
| {"male", 0x2642}, |
| {"mc", 0x63}, /* should be raised */ |
| {"merc", 0x263f}, /* mercury U+263F */ |
| {"min", 0x2212}, |
| {"moonfq", 0x263d}, /* first quarter moon U+263D */ |
| {"moonlq", 0x263e}, /* last quarter moon U+263E */ |
| {"msylab", 0x6d}, /* +sylab (ˌ) */ |
| {"mu", 0x3bc}, |
| {"nacu", 0x144}, |
| {"natural", 0x266e}, |
| {"neq", 0x2260}, |
| {"nfacu", 0x2032}, |
| {"nfasper", 0x2bd}, |
| {"nfbreve", 0x2d8}, |
| {"nfced", 0xb8}, |
| {"nfcirc", 0x2c6}, |
| {"nffrown", 0x2322}, |
| {"nfgra", 0x2cb}, |
| {"nfhacek", 0x2c7}, |
| {"nfmac", 0xaf}, |
| {"nftilde", 0x2dc}, |
| {"nfuml", 0xa8}, |
| {"ng", 0x14b}, |
| {"not", 0xac}, |
| {"notelem", 0x2209}, |
| {"ntilde", 0xf1}, |
| {"nu", 0x3bd}, |
| {"oab", 0x2329}, |
| {"oacu", 0xf3}, |
| {"oasper", MOAS}, |
| {"ob", 0x7b}, |
| {"obar", 0xf8}, |
| {"obigb", 0x7b}, /* should be big */ |
| {"obigpren", 0x28}, |
| {"obigsb", 0x5b}, /* should be big */ |
| {"obreve", 0x14f}, |
| {"ocirc", 0xf4}, |
| {"odsb", 0x301a}, /* [[ U+301A */ |
| {"oe", 0x153}, |
| {"oeamp", 0x26}, |
| {"ograve", 0xf2}, |
| {"ohook", 0x6f}, /* +hook */ |
| {"olenis", MOLN}, |
| {"omac", 0x14d}, |
| {"omega", 0x3c9}, |
| {"omicron", 0x3bf}, |
| {"ope", 0x25b}, |
| {"opp", 0x260d}, |
| {"oq", 0x60}, |
| {"oqq", 0x201c}, |
| {"or", MOR}, |
| {"osb", 0x5b}, |
| {"otilde", 0xf5}, |
| {"ouml", 0xf6}, |
| {"ounce", 0x2125}, /* ounce U+2125 */ |
| {"ovparen", 0x2322}, /* should be sideways ( */ |
| {"p", 0x2032}, |
| {"pa", 0x2202}, |
| {"page", 0x50}, |
| {"pall", 0x28e}, |
| {"paln", 0x272}, |
| {"par", PAR}, |
| {"para", 0xb6}, |
| {"pbar", 0x70}, /* +bar */ |
| {"per", 0x2118}, /* per U+2118 */ |
| {"phi", 0x3c6}, |
| {"phi2", 0x3d5}, |
| {"pi", 0x3c0}, |
| {"pisces", 0x2653}, |
| {"planck", 0x127}, |
| {"plantinJ", 0x4a}, /* should be script */ |
| {"pm", 0xb1}, |
| {"pmil", 0x2030}, |
| {"pp", 0x2033}, |
| {"ppp", 0x2034}, |
| {"prop", 0x221d}, |
| {"psi", 0x3c8}, |
| {"pstlg", 0xa3}, |
| {"q", 0x3f}, /* should be raised */ |
| {"qamets", 0x5b3}, /* U+05B3 */ |
| {"quaver", 0x266a}, |
| {"rar", 0x2192}, |
| {"rasper", MRAS}, |
| {"rdot", 0xb7}, |
| {"recipe", 0x211e}, /* U+211E */ |
| {"reg", 0xae}, |
| {"revC", 0x186}, /* open O U+0186 */ |
| {"reva", 0x252}, |
| {"revc", 0x254}, |
| {"revope", 0x25c}, |
| {"revr", 0x279}, |
| {"revsc", 0x2d2}, /* upside-down semicolon */ |
| {"revv", 0x28c}, |
| {"rfa", 0x6f}, /* +hook (Cf "goal") */ |
| {"rhacek", 0x159}, |
| {"rhalfbr", 0x2309}, |
| {"rho", 0x3c1}, |
| {"rhshoe", 0x2282}, |
| {"rlenis", MRLN}, |
| {"rsylab", 0x72}, /* +sylab */ |
| {"runash", 0x46}, /* should be runic 'ash' */ |
| {"rvow", 0x2d4}, |
| {"sacu", 0x15b}, |
| {"sagit", 0x2650}, |
| {"sampi", 0x3e1}, |
| {"saturn", 0x2644}, |
| {"sced", 0x15f}, |
| {"schwa", 0x259}, |
| {"scorpio", 0x264f}, |
| {"scrA", 0x41}, /* should be script */ |
| {"scrC", 0x43}, |
| {"scrE", 0x45}, |
| {"scrF", 0x46}, |
| {"scrI", 0x49}, |
| {"scrJ", 0x4a}, |
| {"scrL", 'L'}, |
| {"scrO", 0x4f}, |
| {"scrP", 0x50}, |
| {"scrQ", 0x51}, |
| {"scrS", 0x53}, |
| {"scrT", 0x54}, |
| {"scrb", 0x62}, |
| {"scrd", 0x64}, |
| {"scrh", 0x68}, |
| {"scrl", 0x6c}, |
| {"scruple", 0x2108}, /* U+2108 */ |
| {"sdd", 0x2d0}, |
| {"sect", 0xa7}, |
| {"semE", 0x2203}, |
| {"sh", 0x283}, |
| {"shacek", 0x161}, |
| {"sharp", 0x266f}, |
| {"sheva", 0x5b0}, /* U+05B0 */ |
| {"shti", 0x26a}, |
| {"shtsyll", 0x222a}, |
| {"shtu", 0x28a}, |
| {"sidetri", 0x22b2}, |
| {"sigma", 0x3c3}, |
| {"since", 0x2235}, |
| {"slge", 0x2265}, /* should have slanted line under */ |
| {"slle", 0x2264}, /* should have slanted line under */ |
| {"sm", 0x2c8}, |
| {"smm", 0x2cc}, |
| {"spade", 0x2660}, |
| {"sqrt", 0x221a}, |
| {"square", 0x25a1}, /* U+25A1 */ |
| {"ssChi", 0x3a7}, /* should be sans serif */ |
| {"ssIota", 0x399}, |
| {"ssOmicron", 0x39f}, |
| {"ssPi", 0x3a0}, |
| {"ssRho", 0x3a1}, |
| {"ssSigma", 0x3a3}, |
| {"ssTau", 0x3a4}, |
| {"star", 0x2a}, |
| {"stlig", MLST}, |
| {"sup2", 0x2072}, |
| {"supgt", 0x2c3}, |
| {"suplt", 0x2c2}, |
| {"sur", 0x2b3}, |
| {"swing", 0x223c}, |
| {"tau", 0x3c4}, |
| {"taur", 0x2649}, |
| {"th", 0xfe}, |
| {"thbar", 0xfe}, /* +bar */ |
| {"theta", 0x3b8}, |
| {"thinqm", 0x3f}, /* should be thinner */ |
| {"tilde", LTIL}, |
| {"times", 0xd7}, |
| {"tri", 0x2206}, |
| {"trli", 0x2016}, |
| {"ts", 0x2009}, |
| {"uacu", 0xfa}, |
| {"uasper", MUAS}, |
| {"ubar", 0x75}, /* +bar */ |
| {"ubreve", 0x16d}, |
| {"ucirc", 0xfb}, |
| {"udA", 0x2200}, |
| {"udT", 0x22a5}, |
| {"uda", 0x250}, |
| {"udh", 0x265}, |
| {"udqm", 0xbf}, |
| {"udpsi", 0x22d4}, |
| {"udtr", 0x2207}, |
| {"ugrave", 0xf9}, |
| {"ulenis", MULN}, |
| {"umac", 0x16b}, |
| {"uml", LUML}, |
| {"undl", 0x2cd}, /* underline accent */ |
| {"union", 0x222a}, |
| {"upsilon", 0x3c5}, |
| {"uuml", 0xfc}, |
| {"vavpath", 0x5d5}, /* vav U+05D5 (+patah) */ |
| {"vavsheva", 0x5d5}, /* vav U+05D5 (+sheva) */ |
| {"vb", 0x7c}, |
| {"vddd", 0x22ee}, |
| {"versicle2", 0x2123}, /* U+2123 */ |
| {"vinc", 0xaf}, |
| {"virgo", 0x264d}, |
| {"vpal", 0x25f}, |
| {"vvf", 0x263}, |
| {"wasper", MWAS}, |
| {"wavyeq", 0x2248}, |
| {"wlenis", MWLN}, |
| {"wyn", 0x1bf}, /* wynn U+01BF */ |
| {"xi", 0x3be}, |
| {"yacu", 0xfd}, |
| {"ycirc", 0x177}, |
| {"ygh", 0x292}, |
| {"ymac", 0x79}, /* +macron */ |
| {"yuml", 0xff}, |
| {"zced", 0x7a}, /* +cedilla */ |
| {"zeta", 0x3b6}, |
| {"zh", 0x292}, |
| {"zhacek", 0x17e} |
| }; |
| /* |
| The following special characters don't have close enough |
| equivalents in Unicode, so aren't in the above table. |
| 22n 2^(2^n) Cf Fermat |
| 2on4 2/4 |
| 3on8 3/8 |
| Bantuo Bantu O. Cf Otshi-herero |
| Car C with circular arrow on top |
| albrtime cut-time: C with vertical line |
| ardal Cf dental |
| bantuo Bantu o. Cf Otshi-herero |
| bbc1 single chem bond below |
| bbc2 double chem bond below |
| bbl1 chem bond like / |
| bbl2 chem bond like // |
| bbr1 chem bond like \ |
| bbr2 chem bond \\ |
| bcop1 copper symbol. Cf copper |
| bcop2 copper symbol. Cf copper |
| benchm Cf benchmark |
| btc1 single chem bond above |
| btc2 double chem bond above |
| btl1 chem bond like \ |
| btl2 chem bond like \\ |
| btr1 chem bond like / |
| btr2 chem bond line // |
| burman Cf Burman |
| devph sanskrit letter. Cf ph |
| devrfls sanskrit letter. Cf cerebral |
| duplong[12] musical note |
| egchi early form of chi |
| eggamma[12] early form of gamma |
| egiota early form of iota |
| egkappa early form of kappa |
| eglambda early form of lambda |
| egmu[12] early form of mu |
| egnu[12] early form of nu |
| egpi[123] early form of pi |
| egrho[12] early form of rho |
| egsampi early form of sampi |
| egsan early form of san |
| egsigma[12] early form of sigma |
| egxi[123] early form of xi |
| elatS early form of S |
| elatc[12] early form of C |
| elatg[12] early form of G |
| glagjeri Slavonic Glagolitic jeri |
| glagjeru Slavonic Glagolitic jeru |
| hypolem hypolemisk (line with underdot) |
| lhrbr lower half } |
| longmord long mordent |
| mbwvow backwards scretched C. Cf retract. |
| mord music symbol. Cf mordent |
| mostra Cf direct |
| ohgcirc old form of circumflex |
| oldbeta old form of β. Cf perturbate |
| oldsemibr[12] old forms of semibreve. Cf prolation |
| ormg old form of g. Cf G |
| para[12345] form of ¶ |
| pauseo musical pause sign |
| pauseu musical pause sign |
| pharyng Cf pharyngal |
| ragr Black letter ragged r |
| repetn musical repeat. Cf retort |
| segno musical segno sign |
| semain[12] semitic ain |
| semhe semitic he |
| semheth semitic heth |
| semkaph semitic kaph |
| semlamed[12] semitic lamed |
| semmem semitic mem |
| semnum semitic nun |
| sempe semitic pe |
| semqoph[123] semitic qoph |
| semresh semitic resh |
| semtav[1234] semitic tav |
| semyod semitic yod |
| semzayin[123] semitic zayin |
| shtlong[12] U with underbar. Cf glyconic |
| sigmatau σ,τ combination |
| squaver sixteenth note |
| sqbreve square musical breve note |
| swast swastika |
| uhrbr upper half of big } |
| versicle1 Cf versicle |
| */ |
| |
| |
| static Rune normtab[128] = { |
| /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ |
| /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'', |
| 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, |
| /*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, |
| 0x38, 0x39, 0x3a, 0x3b, TAGS, 0x3d, TAGE, 0x3f, |
| /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, |
| 0x48, 0x49, 0x4a, 0x4b, 'L', 0x4d, 0x4e, 0x4f, |
| /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, |
| 0x58, 0x59, 0x5a, 0x5b, '\\', 0x5d, 0x5e, 0x5f, |
| /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, |
| 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, |
| /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, |
| 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE |
| }; |
| static Rune phtab[128] = { |
| /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ |
| /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| /*20*/ 0x20, 0x21, 0x2c8, 0x23, 0x24, 0x2cc, 0xe6, '\'', |
| 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, |
| /*30*/ 0x30, 0x31, 0x32, 0x25c, 0x34, 0x35, 0x36, 0x37, |
| 0x38, 0xf8, 0x2d0, 0x3b, TAGS, 0x3d, TAGE, 0x3f, |
| /*40*/ 0x259, 0x251, 0x42, 0x43, 0xf0, 0x25b, 0x46, 0x47, |
| 0x48, 0x26a, 0x4a, 0x4b, 'L', 0x4d, 0x14b, 0x254, |
| /*50*/ 0x50, 0x252, 0x52, 0x283, 0x3b8, 0x28a, 0x28c, 0x57, |
| 0x58, 0x59, 0x292, 0x5b, '\\', 0x5d, 0x5e, 0x5f, |
| /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, |
| 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, |
| /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, |
| 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE |
| }; |
| static Rune grtab[128] = { |
| /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ |
| /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'', |
| 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, |
| /*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, |
| 0x38, 0x39, 0x3a, 0x3b, TAGS, 0x3d, TAGE, 0x3f, |
| /*40*/ 0x40, 0x391, 0x392, 0x39e, 0x394, 0x395, 0x3a6, 0x393, |
| 0x397, 0x399, 0x3da, 0x39a, 0x39b, 0x39c, 0x39d, 0x39f, |
| /*50*/ 0x3a0, 0x398, 0x3a1, 0x3a3, 0x3a4, 0x3a5, 0x56, 0x3a9, |
| 0x3a7, 0x3a8, 0x396, 0x5b, '\\', 0x5d, 0x5e, 0x5f, |
| /*60*/ 0x60, 0x3b1, 0x3b2, 0x3be, 0x3b4, 0x3b5, 0x3c6, 0x3b3, |
| 0x3b7, 0x3b9, 0x3c2, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3bf, |
| /*70*/ 0x3c0, 0x3b8, 0x3c1, 0x3c3, 0x3c4, 0x3c5, 0x76, 0x3c9, |
| 0x3c7, 0x3c8, 0x3b6, 0x7b, 0x7c, 0x7d, 0x7e, NONE |
| }; |
| static Rune subtab[128] = { |
| /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ |
| /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'', |
| 0x208d, 0x208e, 0x2a, 0x208a, 0x2c, 0x208b, 0x2e, 0x2f, |
| /*30*/ 0x2080, 0x2081, 0x2082, 0x2083, 0x2084, 0x2085, 0x2086, 0x2087, |
| 0x2088, 0x2089, 0x3a, 0x3b, TAGS, 0x208c, TAGE, 0x3f, |
| /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, |
| 0x48, 0x49, 0x4a, 0x4b, 'L', 0x4d, 0x4e, 0x4f, |
| /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, |
| 0x58, 0x59, 0x5a, 0x5b, '\\', 0x5d, 0x5e, 0x5f, |
| /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, |
| 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, |
| /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, |
| 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE |
| }; |
| static Rune suptab[128] = { |
| /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ |
| /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'', |
| 0x207d, 0x207e, 0x2a, 0x207a, 0x2c, 0x207b, 0x2e, 0x2f, |
| /*30*/ 0x2070, 0x2071, 0x2072, 0x2073, 0x2074, 0x2075, 0x2076, 0x2077, |
| 0x2078, 0x2079, 0x3a, 0x3b, TAGS, 0x207c, TAGE, 0x3f, |
| /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, |
| 0x48, 0x49, 0x4a, 0x4b, 'L', 0x4d, 0x4e, 0x4f, |
| /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, |
| 0x58, 0x59, 0x5a, 0x5b, '\\', 0x5d, 0x5e, 0x5f, |
| /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, |
| 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, |
| /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, |
| 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE |
| }; |
| |
| static int tagstarts; |
| static char tag[Buflen]; |
| static int naux; |
| static char auxname[Maxaux][Buflen]; |
| static char auxval[Maxaux][Buflen]; |
| static char spec[Buflen]; |
| static char *auxstate[Naux]; /* vals for most recent tag */ |
| static Entry curentry; |
| #define cursize (curentry.end-curentry.start) |
| |
| static char *getspec(char *, char *); |
| static char *gettag(char *, char *); |
| static void dostatus(void); |
| |
| /* |
| * cmd is one of: |
| * 'p': normal print |
| * 'h': just print headwords |
| * 'P': print raw |
| */ |
| void |
| oedprintentry(Entry e, int cmd) |
| { |
| char *p, *pe; |
| int t, a, i; |
| long r, rprev, rlig; |
| Rune *transtab; |
| |
| p = e.start; |
| pe = e.end; |
| transtab = normtab; |
| rprev = NONE; |
| changett(0, 0, 0); |
| curentry = e; |
| if(cmd == 'h') |
| outinhibit = 1; |
| while(p < pe) { |
| if(cmd == 'r') { |
| outchar(*p++); |
| continue; |
| } |
| r = transtab[(*p++)&0x7F]; |
| if(r < NONE) { |
| /* Emit the rune, but buffer in case of ligature */ |
| if(rprev != NONE) |
| outrune(rprev); |
| rprev = r; |
| } else if(r == SPCS) { |
| /* Start of special character name */ |
| p = getspec(p, pe); |
| r = lookassoc(spectab, asize(spectab), spec); |
| if(r == -1) { |
| if(debug) |
| err("spec %ld %d %s", |
| e.doff, cursize, spec); |
| r = 0xfffd; |
| } |
| if(r >= LIGS && r < LIGE) { |
| /* handle possible ligature */ |
| rlig = liglookup(r, rprev); |
| if(rlig != NONE) |
| rprev = rlig; /* overwrite rprev */ |
| else { |
| /* could print accent, but let's not */ |
| if(rprev != NONE) outrune(rprev); |
| rprev = NONE; |
| } |
| } else if(r >= MULTI && r < MULTIE) { |
| if(rprev != NONE) { |
| outrune(rprev); |
| rprev = NONE; |
| } |
| outrunes(multitab[r-MULTI]); |
| } else if(r == PAR) { |
| if(rprev != NONE) { |
| outrune(rprev); |
| rprev = NONE; |
| } |
| outnl(1); |
| } else { |
| if(rprev != NONE) outrune(rprev); |
| rprev = r; |
| } |
| } else if(r == TAGS) { |
| /* Start of tag name */ |
| if(rprev != NONE) { |
| outrune(rprev); |
| rprev = NONE; |
| } |
| p = gettag(p, pe); |
| t = lookassoc(tagtab, asize(tagtab), tag); |
| if(t == -1) { |
| if(debug) |
| err("tag %ld %d %s", |
| e.doff, cursize, tag); |
| continue; |
| } |
| for(i = 0; i < Naux; i++) |
| auxstate[i] = 0; |
| for(i = 0; i < naux; i++) { |
| a = lookassoc(auxtab, asize(auxtab), auxname[i]); |
| if(a == -1) { |
| if(debug) |
| err("aux %ld %d %s", |
| e.doff, cursize, auxname[i]); |
| } else |
| auxstate[a] = auxval[i]; |
| } |
| switch(t){ |
| case E: |
| case Ve: |
| outnl(0); |
| if(tagstarts) |
| dostatus(); |
| break; |
| case Ed: |
| case Etym: |
| outchar(tagstarts? '[' : ']'); |
| break; |
| case Pr: |
| outchar(tagstarts? '(' : ')'); |
| break; |
| case In: |
| transtab = changett(transtab, subtab, tagstarts); |
| break; |
| case Hm: |
| case Su: |
| case Fq: |
| transtab = changett(transtab, suptab, tagstarts); |
| break; |
| case Gk: |
| transtab = changett(transtab, grtab, tagstarts); |
| break; |
| case Ph: |
| transtab = changett(transtab, phtab, tagstarts); |
| break; |
| case Hw: |
| if(cmd == 'h') { |
| if(!tagstarts) |
| outchar(' '); |
| outinhibit = !tagstarts; |
| } |
| break; |
| case S0: |
| case S1: |
| case S2: |
| case S3: |
| case S4: |
| case S5: |
| case S6: |
| case S7a: |
| case S7n: |
| case Sn: |
| case Sgk: |
| if(tagstarts) { |
| outnl(2); |
| dostatus(); |
| if(auxstate[Num]) { |
| if(t == S3 || t == S5) { |
| i = atoi(auxstate[Num]); |
| while(i--) |
| outchar('*'); |
| outchars(" "); |
| } else if(t == S7a || t == S7n || t == Sn) { |
| outchar('('); |
| outchars(auxstate[Num]); |
| outchars(") "); |
| } else if(t == Sgk) { |
| i = grtab[(uchar)auxstate[Num][0]]; |
| if(i != NONE) |
| outrune(i); |
| outchars(". "); |
| } else { |
| outchars(auxstate[Num]); |
| outchars(". "); |
| } |
| } |
| } |
| break; |
| case Cb: |
| case Db: |
| case Qp: |
| case P: |
| if(tagstarts) |
| outnl(1); |
| break; |
| case Table: |
| /* |
| * Todo: gather columns, justify them, etc. |
| * For now, just let colums come out as rows |
| */ |
| if(!tagstarts) |
| outnl(0); |
| break; |
| case Col: |
| if(tagstarts) |
| outnl(0); |
| break; |
| case Dn: |
| if(tagstarts) |
| outchar('/'); |
| break; |
| } |
| } |
| } |
| if(cmd == 'h') { |
| outinhibit = 0; |
| outnl(0); |
| } |
| } |
| |
| /* |
| * Return offset into bdict where next oed entry after fromoff starts. |
| * Oed entries start with <e>, <ve>, <e st=...>, or <ve st=...> |
| */ |
| long |
| oednextoff(long fromoff) |
| { |
| long a, n; |
| int c; |
| |
| a = Bseek(bdict, fromoff, 0); |
| if(a < 0) |
| return -1; |
| n = 0; |
| for(;;) { |
| c = Bgetc(bdict); |
| if(c < 0) |
| break; |
| if(c == '<') { |
| c = Bgetc(bdict); |
| if(c == 'e') { |
| c = Bgetc(bdict); |
| if(c == '>' || c == ' ') |
| n = 3; |
| } else if(c == 'v' && Bgetc(bdict) == 'e') { |
| c = Bgetc(bdict); |
| if(c == '>' || c == ' ') |
| n = 4; |
| } |
| if(n) |
| break; |
| } |
| } |
| return (Boffset(bdict)-n); |
| } |
| |
| static char *prkey1 = |
| "KEY TO THE PRONUNCIATION\n" |
| "\n" |
| "I. CONSONANTS\n" |
| "b, d, f, k, l, m, n, p, t, v, z: usual English values\n" |
| "\n" |
| "g as in go (gəʊ)\n" |
| "h ... ho! (həʊ)\n" |
| "r ... run (rʌn), terrier (ˈtɛriə(r))\n" |
| "(r)... her (hɜː(r))\n" |
| "s ... see (siː), success (səkˈsɜs)\n" |
| "w ... wear (wɛə(r))\n" |
| "hw ... when (hwɛn)\n" |
| "j ... yes (jɛs)\n" |
| "θ ... thin (θin), bath (bɑːθ)\n" |
| "ð ... then (ðɛn), bathe (beɪð)\n" |
| "ʃ ... shop (ʃɒp), dish (dɪʃ)\n" |
| "tʃ ... chop (tʃɒp), ditch (dɪtʃ)\n" |
| "ʒ ... vision (ˈvɪʒən), déjeuner (deʒøne)\n" |
| ; |
| static char *prkey2 = |
| "dʒ ... judge (dʒʌdʒ)\n" |
| "ŋ ... singing (ˈsɪŋɪŋ), think (θiŋk)\n" |
| "ŋg ... finger (ˈfiŋgə(r))\n" |
| "\n" |
| "Foreign\n" |
| "ʎ as in It. seraglio (serˈraʎo)\n" |
| "ɲ ... Fr. cognac (kɔɲak)\n" |
| "x ... Ger. ach (ax), Sc. loch (lɒx)\n" |
| "ç ... Ger. ich (ɪç), Sc. nicht (nɪçt)\n" |
| "ɣ ... North Ger. sagen (ˈzaːɣən)\n" |
| "c ... Afrikaans baardmannetjie (ˈbaːrtmanəci)\n" |
| "ɥ ... Fr. cuisine (kɥizin)\n" |
| "\n" |
| ; |
| static char *prkey3 = |
| "II. VOWELS AND DIPTHONGS\n" |
| "\n" |
| "Short\n" |
| "ɪ as in pit (pɪt), -ness (-nɪs)\n" |
| "ɛ ... pet (pɛt), Fr. sept (sɛt)\n" |
| "æ ... pat (pæt)\n" |
| "ʌ ... putt (pʌt)\n" |
| "ɒ ... pot (pɒt)\n" |
| "ʊ ... put (pʊt)\n" |
| "ə ... another (əˈnʌðə(r))\n" |
| "(ə)... beaten (ˈbiːt(ə)n)\n" |
| "i ... Fr. si (si)\n" |
| "e ... Fr. bébé (bebe)\n" |
| "a ... Fr. mari (mari)\n" |
| "ɑ ... Fr. bâtiment (bɑtimã)\n" |
| "ɔ ... Fr. homme (ɔm)\n" |
| "o ... Fr. eau (o)\n" |
| "ø ... Fr. peu (pø)\n" |
| ; |
| static char *prkey4 = |
| "œ ... Fr. boeuf (bœf), coeur (kœr)\n" |
| "u ... Fr. douce (dus)\n" |
| "ʏ ... Ger. Müller (ˈmʏlər)\n" |
| "y ... Fr. du (dy)\n" |
| "\n" |
| "Long\n" |
| "iː as in bean (biːn)\n" |
| "ɑː ... barn (bɑːn)\n" |
| "ɔː ... born (bɔːn)\n" |
| "uː ... boon (buːn)\n" |
| "ɜː ... burn (bɜːn)\n" |
| "eː ... Ger. Schnee (ʃneː)\n" |
| "ɛː ... Ger. Fähre (ˈfɛːrə)\n" |
| "aː ... Ger. Tag (taːk)\n" |
| "oː ... Ger. Sohn (zoːn)\n" |
| "øː ... Ger. Goethe (gøːtə)\n" |
| "yː ... Ger. grün (gryːn)\n" |
| "\n" |
| ; |
| static char *prkey5 = |
| "Nasal\n" |
| "ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n" |
| "ã ... Fr. franc (frã)\n" |
| "ɔ˜ ... Fr. bon (bɔ˜n)\n" |
| "œ˜ ... Fr. un (œ˜)\n" |
| "\n" |
| "Dipthongs, etc.\n" |
| "eɪ as in bay (beɪ)\n" |
| "aɪ ... buy (baɪ)\n" |
| "ɔɪ ... boy (bɔɪ)\n" |
| "əʊ ... no (nəʊ)\n" |
| "aʊ ... now (naʊ)\n" |
| "ɪə ... peer (pɪə(r))\n" |
| "ɛə ... pair (pɛə(r))\n" |
| "ʊə ... tour (tʊə(r))\n" |
| "ɔə ... boar (bɔə(r))\n" |
| "\n" |
| ; |
| static char *prkey6 = |
| "III. STRESS\n" |
| "\n" |
| "Main stress: ˈ preceding stressed syllable\n" |
| "Secondary stress: ˌ preceding stressed syllable\n" |
| "\n" |
| "E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n"; |
| /* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */ |
| |
| void |
| oedprintkey(void) |
| { |
| Bprint(bout, "%s%s%s%s%s%s", |
| prkey1, prkey2, prkey3, prkey4, prkey5, prkey6); |
| } |
| |
| /* |
| * f points just after a '&', fe points at end of entry. |
| * Accumulate the special name, starting after the & |
| * and continuing until the next '.', in spec[]. |
| * Return pointer to char after '.'. |
| */ |
| static char * |
| getspec(char *f, char *fe) |
| { |
| char *t; |
| int c, i; |
| |
| t = spec; |
| i = sizeof spec; |
| while(--i > 0) { |
| c = *f++; |
| if(c == '.' || f == fe) |
| break; |
| *t++ = c; |
| } |
| *t = 0; |
| return f; |
| } |
| |
| /* |
| * f points just after '<'; fe points at end of entry. |
| * Expect next characters from bin to match: |
| * [/][^ >]+( [^>=]+=[^ >]+)*> |
| * tag auxname auxval |
| * Accumulate the tag and its auxilliary information in |
| * tag[], auxname[][] and auxval[][]. |
| * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0. |
| * Set naux to the number of aux pairs found. |
| * Return pointer to after final '>'. |
| */ |
| static char * |
| gettag(char *f, char *fe) |
| { |
| char *t; |
| int c, i; |
| |
| t = tag; |
| c = *f++; |
| if(c == '/') |
| tagstarts = 0; |
| else { |
| tagstarts = 1; |
| *t++ = c; |
| } |
| i = Buflen; |
| naux = 0; |
| while(--i > 0) { |
| c = *f++; |
| if(c == '>' || f == fe) |
| break; |
| if(c == ' ') { |
| *t = 0; |
| t = auxname[naux]; |
| i = Buflen; |
| if(naux < Maxaux-1) |
| naux++; |
| } else if(naux && c == '=') { |
| *t = 0; |
| t = auxval[naux-1]; |
| i = Buflen; |
| } else |
| *t++ = c; |
| } |
| *t = 0; |
| return f; |
| } |
| |
| static void |
| dostatus(void) |
| { |
| char *s; |
| |
| s = auxstate[St]; |
| if(s) { |
| if(strcmp(s, "obs") == 0) |
| outrune(0x2020); |
| else if(strcmp(s, "ali") == 0) |
| outrune(0x2016); |
| else if(strcmp(s, "err") == 0 || strcmp(s, "spu") == 0) |
| outrune(0xb6); |
| else if(strcmp(s, "xref") == 0) |
| {/* nothing */} |
| else if(debug) |
| err("status %ld %d %s", curentry.doff, cursize, s); |
| } |
| } |