| #include <u.h> |
| #include <libc.h> |
| #include <bio.h> |
| #include "dict.h" |
| |
| /* |
| * Routines for handling dictionaries in the "Paperback Collins" |
| * format (with tags surrounded by >....<) |
| */ |
| enum { |
| Buflen=1000 |
| }; |
| |
| /* More special runes */ |
| enum { |
| B = MULTIE+1, /* bold */ |
| H, /* headword start */ |
| I, /* italics */ |
| Ps, /* pronunciation start */ |
| Pe, /* pronunciation end */ |
| R, /* roman */ |
| X /* headword end */ |
| }; |
| |
| /* Assoc tables must be sorted on first field */ |
| |
| static Assoc tagtab[] = { |
| {"AA", 0xc5}, |
| {"AC", LACU}, |
| {"B", B}, |
| {"CE", LCED}, |
| {"CI", LFRN}, |
| {"Di", 0x131}, |
| {"EL", 0x2d}, |
| {"GR", LGRV}, |
| {"H", H}, |
| {"I", I}, |
| {"OE", 0x152}, |
| {"R", R}, |
| {"TI", LTIL}, |
| {"UM", LUML}, |
| {"X", X}, |
| {"[", Ps}, |
| {"]", Pe}, |
| {"ac", LACU}, |
| {"ce", LCED}, |
| {"ci", LFRN}, |
| {"gr", LGRV}, |
| {"oe", 0x153}, |
| {"supe", 0x65}, /* should be raised */ |
| {"supo", 0x6f}, /* should be raised */ |
| {"ti", LTIL}, |
| {"um", LUML}, |
| {"{", Ps}, |
| {"~", 0x7e}, |
| {"~~", MTT} |
| }; |
| |
| static Rune normtab[128] = { |
| /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ |
| /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| NONE, NONE, 0x20, NONE, NONE, NONE, NONE, NONE, |
| /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
| /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, '\'', |
| 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, |
| /*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, |
| 0x38, 0x39, 0x3a, 0x3b, TAGE, 0x3d, TAGS, 0x3f, |
| /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, |
| 0x48, 0x49, 0x4a, 0x4b, 'L', 0x4d, 0x4e, 0x4f, |
| /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, |
| 0x58, 0x59, 0x5a, 0x5b, '\\', 0x5d, 0x5e, 0x5f, |
| /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, |
| 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, |
| /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, |
| 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE |
| }; |
| |
| static char *gettag(char *, char *); |
| |
| static Entry curentry; |
| static char tag[Buflen]; |
| #define cursize (curentry.end-curentry.start) |
| |
| void |
| pcollprintentry(Entry e, int cmd) |
| { |
| char *p, *pe; |
| long r, rprev, t, rlig; |
| int saveoi; |
| Rune *transtab; |
| |
| p = e.start; |
| pe = e.end; |
| transtab = normtab; |
| rprev = NONE; |
| changett(0, 0, 0); |
| curentry = e; |
| saveoi = 0; |
| if(cmd == 'h') |
| outinhibit = 1; |
| while(p < pe) { |
| if(cmd == 'r') { |
| outchar(*p++); |
| continue; |
| } |
| r = transtab[(*p++)&0x7F]; |
| if(r < NONE) { |
| /* Emit the rune, but buffer in case of ligature */ |
| if(rprev != NONE) |
| outrune(rprev); |
| rprev = r; |
| } else if(r == TAGS) { |
| p = gettag(p, pe); |
| t = lookassoc(tagtab, asize(tagtab), tag); |
| if(t == -1) { |
| if(debug && !outinhibit) |
| err("tag %ld %d %s", |
| e.doff, cursize, tag); |
| continue; |
| } |
| if(t < NONE) { |
| if(rprev != NONE) |
| outrune(rprev); |
| rprev = t; |
| } else if(t >= LIGS && t < LIGE) { |
| /* handle possible ligature */ |
| rlig = liglookup(t, rprev); |
| if(rlig != NONE) |
| rprev = rlig; /* overwrite rprev */ |
| else { |
| /* could print accent, but let's not */ |
| if(rprev != NONE) outrune(rprev); |
| rprev = NONE; |
| } |
| } else if(t >= MULTI && t < MULTIE) { |
| if(rprev != NONE) { |
| outrune(rprev); |
| rprev = NONE; |
| } |
| outrunes(multitab[t-MULTI]); |
| } else { |
| if(rprev != NONE) { |
| outrune(rprev); |
| rprev = NONE; |
| } |
| switch(t){ |
| case H: |
| if(cmd == 'h') |
| outinhibit = 0; |
| else |
| outnl(0); |
| break; |
| case X: |
| if(cmd == 'h') |
| outinhibit = 1; |
| else |
| outchars(". "); |
| break; |
| case Ps: |
| /* don't know enough of pron. key yet */ |
| saveoi = outinhibit; |
| outinhibit = 1; |
| break; |
| case Pe: |
| outinhibit = saveoi; |
| break; |
| } |
| } |
| } |
| } |
| if(cmd == 'h') |
| outinhibit = 0; |
| outnl(0); |
| } |
| |
| long |
| pcollnextoff(long fromoff) |
| { |
| long a; |
| char *p; |
| |
| a = Bseek(bdict, fromoff, 0); |
| if(a < 0) |
| return -1; |
| for(;;) { |
| p = Brdline(bdict, '\n'); |
| if(!p) |
| break; |
| if(p[0] == '>' && p[1] == 'H' && p[2] == '<') |
| return (Boffset(bdict)-Blinelen(bdict)); |
| } |
| return -1; |
| } |
| |
| void |
| pcollprintkey(void) |
| { |
| Bprint(bout, "No pronunciation key yet\n"); |
| } |
| |
| /* |
| * f points just after '>'; fe points at end of entry. |
| * Expect next characters from bin to match: |
| * [^ <]+< |
| * tag |
| * Accumulate the tag in tag[]. |
| * Return pointer to after final '<'. |
| */ |
| static char * |
| gettag(char *f, char *fe) |
| { |
| char *t; |
| int c, i; |
| |
| t = tag; |
| i = Buflen; |
| while(--i > 0) { |
| c = *f++; |
| if(c == '<' || f == fe) |
| break; |
| *t++ = c; |
| } |
| *t = 0; |
| return f; |
| } |