|  | #include <u.h> | 
|  | #include <libc.h> | 
|  | #include <bio.h> | 
|  | /* Macros for Rune support of ctype.h-like functions */ | 
|  |  | 
|  | #undef isupper | 
|  | #undef islower | 
|  | #undef isalpha | 
|  | #undef isdigit | 
|  | #undef isalnum | 
|  | #undef isspace | 
|  | #undef tolower | 
|  | #define	isupper(r)	('A' <= (r) && (r) <= 'Z') | 
|  | #define	islower(r)	('a' <= (r) && (r) <= 'z') | 
|  | #define	isalpha(r)	(isupper(r) || islower(r)) | 
|  | #define	islatin1(r)	(0xC0 <= (r) && (r) <= 0xFF) | 
|  |  | 
|  | #define	isdigit(r)	('0' <= (r) && (r) <= '9') | 
|  |  | 
|  | #define	isalnum(r)	(isalpha(r) || isdigit(r)) | 
|  |  | 
|  | #define	isspace(r)	((r) == ' ' || (r) == '\t' \ | 
|  | || (0x0A <= (r) && (r) <= 0x0D)) | 
|  |  | 
|  | #define	tolower(r)	((r)-'A'+'a') | 
|  |  | 
|  | #define	sgn(v)		((v) < 0 ? -1 : ((v) > 0 ? 1 : 0)) | 
|  |  | 
|  | #define	WORDSIZ	4000 | 
|  | char	*filename = "#9/lib/words"; | 
|  | Biobuf	*dfile; | 
|  | Biobuf	bout; | 
|  | Biobuf	bin; | 
|  |  | 
|  | int	fold; | 
|  | int	direc; | 
|  | int	exact; | 
|  | int	iflag; | 
|  | int	rev = 1;	/*-1 for reverse-ordered file, not implemented*/ | 
|  | int	(*compare)(Rune*, Rune*); | 
|  | Rune	tab = '\t'; | 
|  | Rune	entry[WORDSIZ]; | 
|  | Rune	word[WORDSIZ]; | 
|  | Rune	key[50], orig[50]; | 
|  | Rune	latin_fold_tab[] = | 
|  | { | 
|  | /*	Table to fold latin 1 characters to ASCII equivalents | 
|  | based at Rune value 0xc0 | 
|  |  | 
|  | À    Á    Â    Ã    Ä    Å    Æ    Ç | 
|  | È    É    Ê    Ë    Ì    Í    Î    Ï | 
|  | Ð    Ñ    Ò    Ó    Ô    Õ    Ö    × | 
|  | Ø    Ù    Ú    Û    Ü    Ý    Þ    ß | 
|  | à    á    â    ã    ä    å    æ    ç | 
|  | è    é    ê    ë    ì    í    î    ï | 
|  | ð    ñ    ò    ó    ô    õ    ö    ÷ | 
|  | ø    ù    ú    û    ü    ý    þ    ÿ | 
|  | */ | 
|  | 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', | 
|  | 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', | 
|  | 'd', 'n', 'o', 'o', 'o', 'o', 'o',  0 , | 
|  | 'o', 'u', 'u', 'u', 'u', 'y',  0 ,  0 , | 
|  | 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', | 
|  | 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', | 
|  | 'd', 'n', 'o', 'o', 'o', 'o', 'o',  0 , | 
|  | 'o', 'u', 'u', 'u', 'u', 'y',  0 , 'y', | 
|  | }; | 
|  |  | 
|  | int	locate(void); | 
|  | int	acomp(Rune*, Rune*); | 
|  | int	getword(Biobuf*, Rune *rp, int n); | 
|  | void	torune(char*, Rune*); | 
|  | void	rcanon(Rune*, Rune*); | 
|  | int	ncomp(Rune*, Rune*); | 
|  |  | 
|  | void | 
|  | main(int argc, char *argv[]) | 
|  | { | 
|  | int n; | 
|  |  | 
|  | filename = unsharp(filename); | 
|  |  | 
|  | Binit(&bin, 0, OREAD); | 
|  | Binit(&bout, 1, OWRITE); | 
|  | compare = acomp; | 
|  | ARGBEGIN{ | 
|  | case 'd': | 
|  | direc++; | 
|  | break; | 
|  | case 'f': | 
|  | fold++; | 
|  | break; | 
|  | case 'i': | 
|  | iflag++; | 
|  | break; | 
|  | case 'n': | 
|  | compare = ncomp; | 
|  | break; | 
|  | case 't': | 
|  | chartorune(&tab,ARGF()); | 
|  | break; | 
|  | case 'x': | 
|  | exact++; | 
|  | break; | 
|  | default: | 
|  | fprint(2, "%s: bad option %c\n", argv0, ARGC()); | 
|  | fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0); | 
|  | exits("usage"); | 
|  | } ARGEND | 
|  | if(!iflag){ | 
|  | if(argc >= 1) { | 
|  | torune(argv[0], orig); | 
|  | argv++; | 
|  | argc--; | 
|  | } else | 
|  | iflag++; | 
|  | } | 
|  | if(argc < 1) { | 
|  | direc++; | 
|  | fold++; | 
|  | } else | 
|  | filename = argv[0]; | 
|  | if (!iflag) | 
|  | rcanon(orig, key); | 
|  | dfile = Bopen(filename, OREAD); | 
|  | if(dfile == 0) { | 
|  | fprint(2, "look: can't open %s\n", filename); | 
|  | exits("no dictionary"); | 
|  | } | 
|  | if(!iflag) | 
|  | if(!locate()) | 
|  | exits("not found"); | 
|  | do { | 
|  | if(iflag) { | 
|  | Bflush(&bout); | 
|  | if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0]))) | 
|  | exits(0); | 
|  | rcanon(orig, key); | 
|  | if(!locate()) | 
|  | continue; | 
|  | } | 
|  | if (!exact || !acomp(word, key)) | 
|  | Bprint(&bout, "%S\n", entry); | 
|  | while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) { | 
|  | rcanon(entry, word); | 
|  | n = compare(key, word); | 
|  | switch(n) { | 
|  | case -1: | 
|  | if(exact) | 
|  | break; | 
|  | case 0: | 
|  | if (!exact || !acomp(word, orig)) | 
|  | Bprint(&bout, "%S\n", entry); | 
|  | continue; | 
|  | } | 
|  | break; | 
|  | } | 
|  | } while(iflag); | 
|  | exits(0); | 
|  | } | 
|  |  | 
|  | int | 
|  | locate(void) | 
|  | { | 
|  | vlong top, bot, mid; | 
|  | int c; | 
|  | int n; | 
|  |  | 
|  | bot = 0; | 
|  | top = Bseek(dfile, 0L, 2); | 
|  | for(;;) { | 
|  | mid = (top+bot) / 2; | 
|  | Bseek(dfile, mid, 0); | 
|  | do | 
|  | c = Bgetrune(dfile); | 
|  | while(c>=0 && c!='\n'); | 
|  | mid = Boffset(dfile); | 
|  | if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) | 
|  | break; | 
|  | rcanon(entry, word); | 
|  | n = compare(key, word); | 
|  | switch(n) { | 
|  | case -2: | 
|  | case -1: | 
|  | case 0: | 
|  | if(top <= mid) | 
|  | break; | 
|  | top = mid; | 
|  | continue; | 
|  | case 1: | 
|  | case 2: | 
|  | bot = mid; | 
|  | continue; | 
|  | } | 
|  | break; | 
|  | } | 
|  | Bseek(dfile, bot, 0); | 
|  | while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) { | 
|  | rcanon(entry, word); | 
|  | n = compare(key, word); | 
|  | switch(n) { | 
|  | case -2: | 
|  | return 0; | 
|  | case -1: | 
|  | if(exact) | 
|  | return 0; | 
|  | case 0: | 
|  | return 1; | 
|  | case 1: | 
|  | case 2: | 
|  | continue; | 
|  | } | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | *	acomp(s, t) returns: | 
|  | *		-2 if s strictly precedes t | 
|  | *		-1 if s is a prefix of t | 
|  | *		0 if s is the same as t | 
|  | *		1 if t is a prefix of s | 
|  | *		2 if t strictly precedes s | 
|  | */ | 
|  |  | 
|  | int | 
|  | acomp(Rune *s, Rune *t) | 
|  | { | 
|  | int cs, ct; | 
|  |  | 
|  | for(;;) { | 
|  | cs = *s; | 
|  | ct = *t; | 
|  | if(cs != ct) | 
|  | break; | 
|  | if(cs == 0) | 
|  | return 0; | 
|  | s++; | 
|  | t++; | 
|  | } | 
|  | if(cs == 0) | 
|  | return -1; | 
|  | if(ct == 0) | 
|  | return 1; | 
|  | if(cs < ct) | 
|  | return -2; | 
|  | return 2; | 
|  | } | 
|  |  | 
|  | void | 
|  | torune(char *old, Rune *new) | 
|  | { | 
|  | do old += chartorune(new, old); | 
|  | while(*new++); | 
|  | } | 
|  |  | 
|  | void | 
|  | rcanon(Rune *old, Rune *new) | 
|  | { | 
|  | Rune r; | 
|  |  | 
|  | while((r = *old++) && r != tab) { | 
|  | if (islatin1(r) && latin_fold_tab[r-0xc0]) | 
|  | r = latin_fold_tab[r-0xc0]; | 
|  | if(direc) | 
|  | if(!(isalnum(r) || r == ' ' || r == '\t')) | 
|  | continue; | 
|  | if(fold) | 
|  | if(isupper(r)) | 
|  | r = tolower(r); | 
|  | *new++ = r; | 
|  | } | 
|  | *new = 0; | 
|  | } | 
|  |  | 
|  | int | 
|  | ncomp(Rune *s, Rune *t) | 
|  | { | 
|  | Rune *is, *it, *js, *jt; | 
|  | int a, b; | 
|  | int ssgn, tsgn; | 
|  |  | 
|  | while(isspace(*s)) | 
|  | s++; | 
|  | while(isspace(*t)) | 
|  | t++; | 
|  | ssgn = tsgn = -2*rev; | 
|  | if(*s == '-') { | 
|  | s++; | 
|  | ssgn = -ssgn; | 
|  | } | 
|  | if(*t == '-') { | 
|  | t++; | 
|  | tsgn = -tsgn; | 
|  | } | 
|  | for(is = s; isdigit(*is); is++) | 
|  | ; | 
|  | for(it = t; isdigit(*it); it++) | 
|  | ; | 
|  | js = is; | 
|  | jt = it; | 
|  | a = 0; | 
|  | if(ssgn == tsgn) | 
|  | while(it>t && is>s) | 
|  | if(b = *--it - *--is) | 
|  | a = b; | 
|  | while(is > s) | 
|  | if(*--is != '0') | 
|  | return -ssgn; | 
|  | while(it > t) | 
|  | if(*--it != '0') | 
|  | return tsgn; | 
|  | if(a) | 
|  | return sgn(a)*ssgn; | 
|  | if(*(s=js) == '.') | 
|  | s++; | 
|  | if(*(t=jt) == '.') | 
|  | t++; | 
|  | if(ssgn == tsgn) | 
|  | while(isdigit(*s) && isdigit(*t)) | 
|  | if(a = *t++ - *s++) | 
|  | return sgn(a)*ssgn; | 
|  | while(isdigit(*s)) | 
|  | if(*s++ != '0') | 
|  | return -ssgn; | 
|  | while(isdigit(*t)) | 
|  | if(*t++ != '0') | 
|  | return tsgn; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int | 
|  | getword(Biobuf *f, Rune *rp, int n) | 
|  | { | 
|  | long c; | 
|  |  | 
|  | while(n-- > 0) { | 
|  | c = Bgetrune(f); | 
|  | if(c < 0) | 
|  | return 0; | 
|  | if(c == '\n') { | 
|  | *rp = '\0'; | 
|  | return 1; | 
|  | } | 
|  | *rp++ = c; | 
|  | } | 
|  | fprint(2, "Look: word too long.  Bailing out.\n"); | 
|  | return 0; | 
|  | } |