| #include <u.h> |
| #include <libc.h> |
| #include <bio.h> |
| #include "dict.h" |
| |
| /* |
| * Use this to start making an index for a new dictionary. |
| * Get the dictionary-specific nextoff and printentry(_,'h') |
| * commands working, add a record to the dicts[] array below, |
| * and run this program to get a list of offset,headword |
| * pairs |
| */ |
| Biobuf boutbuf; |
| Biobuf *bdict; |
| Biobuf *bout = &boutbuf; |
| int linelen; |
| int breaklen = 2000; |
| int outinhibit; |
| int debug; |
| |
| Dict *dict; /* current dictionary */ |
| |
| Entry getentry(long); |
| |
| void |
| main(int argc, char **argv) |
| { |
| int i; |
| long a, ae; |
| char *p; |
| Entry e; |
| |
| Binit(&boutbuf, 1, OWRITE); |
| dict = &dicts[0]; |
| ARGBEGIN { |
| case 'd': |
| p = ARGF(); |
| dict = 0; |
| if(p) { |
| for(i=0; dicts[i].name; i++) |
| if(strcmp(p, dicts[i].name)==0) { |
| dict = &dicts[i]; |
| break; |
| } |
| } |
| if(!dict) { |
| err("unknown dictionary: %s", p); |
| exits("nodict"); |
| } |
| break; |
| case 'D': |
| debug++; |
| break; |
| ARGEND } |
| USED(argc,argv); |
| bdict = Bopen(dict->path, OREAD); |
| ae = Bseek(bdict, 0, 2); |
| if(!bdict) { |
| err("can't open dictionary %s", dict->path); |
| exits("nodict"); |
| } |
| for(a = 0; a < ae; a = (*dict->nextoff)(a+1)) { |
| linelen = 0; |
| e = getentry(a); |
| Bprint(bout, "%ld\t", a); |
| linelen = 4; /* only has to be approx right */ |
| (*dict->printentry)(e, 'h'); |
| } |
| exits(0); |
| } |
| |
| Entry |
| getentry(long b) |
| { |
| long e, n, dtop; |
| static Entry ans; |
| static int anslen = 0; |
| |
| e = (*dict->nextoff)(b+1); |
| ans.doff = b; |
| if(e < 0) { |
| dtop = Bseek(bdict, 0L, 2); |
| if(b < dtop) { |
| e = dtop; |
| } else { |
| err("couldn't seek to entry"); |
| ans.start = 0; |
| ans.end = 0; |
| } |
| } |
| n = e-b; |
| if(n) { |
| if(n > anslen) { |
| ans.start = realloc(ans.start, n); |
| if(!ans.start) { |
| err("out of memory"); |
| exits("nomem"); |
| } |
| anslen = n; |
| } |
| Bseek(bdict, b, 0); |
| n = Bread(bdict, ans.start, n); |
| ans.end = ans.start + n; |
| } |
| return ans; |
| } |