src/cmd/dict/dict.c - plan9 - Git at Google

 #include <u.h>
 #include <libc.h>
 #include <bio.h>
 #include <regexp.h>
 #include <ctype.h>
 #include "dict.h"

 /*
  * Assumed index file structure: lines of form
  * 	[^\t]+\t[0-9]+
  * First field is key, second is byte offset into dictionary.
  * Should be sorted with args -u -t'	' +0f -1 +0 -1 +1n -2
  */
 typedef struct Addr Addr;

 struct Addr {
 	int	n;		/* number of offsets */
 	int	cur;		/* current position within doff array */
 	int	maxn;		/* actual current size of doff array */
 	ulong	doff[1];	/* doff[maxn], with 0..n-1 significant */
 };

 Biobuf	binbuf;
 Biobuf	boutbuf;
 Biobuf	*bin = &binbuf;		/* user cmd input */
 Biobuf	*bout = &boutbuf;	/* output */
 Biobuf	*bdict;			/* dictionary */
 Biobuf	*bindex;		/* index file */
 long	indextop;		/* index offset at end of file */
 int	lastcmd;		/* last executed command */
 Addr	*dot;			/* "current" address */
 Dict	*dict;			/* current dictionary */
 int	linelen;
 int	breaklen = 60;
 int	outinhibit;
 int	debug;

 void	execcmd(int);
 int	getpref(char*, Rune*);
 Entry	getentry(int);
 int	getfield(Rune*);
 long	locate(Rune*);
 int	parseaddr(char*, char**);
 int	parsecmd(char*);
 int	search(char*, int);
 long	seeknextline(Biobuf*, long);
 void	setdotnext(void);
 void	setdotprev(void);
 void	sortaddr(Addr*);
 void	usage(void);

 enum {
 	Plen=300,	/* max length of a search pattern */
 	Fieldlen=200,	/* max length of an index field */
 	Aslots=10	/* initial number of slots in an address */
 };

 void
 main(int argc, char **argv)
 {
 	int i, cmd, kflag;
 	char *line, *p;

 	Binit(&binbuf, 0, OREAD);
 	Binit(&boutbuf, 1, OWRITE);
 	kflag = 0;
 	line = 0;
 	dict = 0;

 	for(i=0; dicts[i].name; i++){
 		if(access(dictfile(dicts[i].path), 0)>=0 && access(dictfile(dicts[i].indexpath), 0)>=0){
 			dict = &dicts[i];
 			break;
 		}
 	}
 	ARGBEGIN {
 		case 'd':
 			p = ARGF();
 			dict = 0;
 			if(p) {
 				for(i=0; dicts[i].name; i++)
 					if(strcmp(p, dicts[i].name)==0) {
 						dict = &dicts[i];
 						break;
 					}
 			}
 			if(!dict)
 				usage();
 			break;
 		case 'c':
 			line = ARGF();
 			if(!line)
 				usage();
 			break;
 		case 'k':
 			kflag++;
 			break;
 		case 'D':
 			debug++;
 			break;
 		default:
 			usage();
 	ARGEND }
 	if(dict == 0){
 		err("no dictionaries present on this system");
 		exits("nodict");
 	}

 	if(kflag) {
 		(*dict->printkey)();
 		exits(0);
 	}
 	if(argc > 1)
 		usage();
 	else if(argc == 1) {
 		if(line)
 			usage();
 		p = argv[0];
 		line = malloc(strlen(p)+5);
 		sprint(line, "/%s/P\n", p);
 	}
 	dict->path = dictfile(dict->path);
 	dict->indexpath = dictfile(dict->indexpath);
 	bdict = Bopen(dict->path, OREAD);
 	if(!bdict) {
 		err("can't open dictionary %s", dict->path);
 		exits("nodict");
 	}
 	bindex = Bopen(dict->indexpath, OREAD);
 	if(!bindex) {
 		err("can't open index %s", dict->indexpath);
 		exits("noindex");
 	}
 	indextop = Bseek(bindex, 0L, 2);

 	dot = malloc(sizeof(Addr)+(Aslots-1)*sizeof(ulong));
 	dot->n = 0;
 	dot->cur = 0;
 	dot->maxn = Aslots;
 	lastcmd = 0;

 	if(line) {
 		cmd = parsecmd(line);
 		if(cmd)
 			execcmd(cmd);
 	} else {
 		for(;;) {
 			Bprint(bout, "*");
 			Bflush(bout);
 			line = Brdline(bin, '\n');
 			linelen = 0;
 			if(!line)
 				break;
 			cmd = parsecmd(line);
 			if(cmd) {
 				execcmd(cmd);
 				lastcmd = cmd;
 			}
 		}
 	}
 	exits(0);
 }

 void
 usage(void)
 {
 	int i;
 	char *a, *b;

 	Bprint(bout, "Usage: %s [-d dict] [-k] [-c cmd] [word]\n", argv0);
 	Bprint(bout, "dictionaries (brackets mark dictionaries not present on this system):\n");
 	for(i = 0; dicts[i].name; i++){
 		a = b = "";
 		if(access(dictfile(dicts[i].path), 0)<0 || access(dictfile(dicts[i].indexpath), 0)<0){
 			a = "[";
 			b = "]";
 		}
 		Bprint(bout, "   %s%s\t%s%s\n", a, dicts[i].name, dicts[i].desc, b);
 	}
 	exits("usage");
 }

 int
 parsecmd(char *line)
 {
 	char *e;
 	int cmd, ans;

 	if(parseaddr(line, &e) >= 0)
 		line = e;
 	else
 		return 0;
 	cmd = *line;
 	ans = cmd;
 	if(isupper(cmd))
 		cmd = tolower(cmd);
 	if(!(cmd == 'a' || cmd == 'h' || cmd == 'p' || cmd == 'r' ||
 	     cmd == '\n')) {
 		err("unknown command %c", cmd);
 		return 0;
 	}
 	if(cmd == '\n')
 		switch(lastcmd) {
 			case 0:	ans = 'H'; break;
 			case 'H':	ans = 'p'; break;
 			default :	ans = lastcmd; break;
 		}
 	else if(line[1] != '\n' && line[1] != 0)
 		err("extra stuff after command %c ignored", cmd);
 	return ans;
 }

 void
 execcmd(int cmd)
 {
 	Entry e;
 	int cur, doall;

 	if(isupper(cmd)) {
 		doall = 1;
 		cmd = tolower(cmd);
 		cur = 0;
 	} else {
 		doall = 0;
 		cur = dot->cur;
 	}
 	if(debug && doall && cmd == 'a')
 		Bprint(bout, "%d entries, cur=%d\n", dot->n, cur+1);
 	for(;;){
 		if(cur >= dot->n)
 			break;
 		if(doall) {
 			Bprint(bout, "%d\t", cur+1);
 			linelen += 4 + (cur >= 10);
 		}
 		switch(cmd) {
 		case 'a':
 			Bprint(bout, "#%lud\n", dot->doff[cur]);
 			break;
 		case 'h':
 		case 'p':
 		case 'r':
 			e = getentry(cur);
 			(*dict->printentry)(e, cmd);
 			break;
 		}
 		cur++;
 		if(doall) {
 			if(cmd == 'p' || cmd == 'r') {
 				Bputc(bout, '\n');
 				linelen = 0;
 			}
 		} else
 			break;
 	}
 	if(cur >= dot->n)
 		cur = 0;
 	dot->cur = cur;
 }

 /*
  * Address syntax: ('.' | '/' re '/' | '!' re '!' | number | '#' number) ('+' | '-')*
  * Answer goes in dot.
  * Return -1 if address starts, but get error.
  * Return 0 if no address.
  */
 int
 parseaddr(char *line, char **eptr)
 {
 	int delim, plen;
 	ulong v;
 	char *e;
 	char pat[Plen];

 	if(*line == '/' || *line == '!') {
 		/* anchored regular expression match; '!' means no folding */
 		if(*line == '/') {
 			delim = '/';
 			e = strpbrk(line+1, "/\n");
 		} else {
 			delim = '!';
 			e = strpbrk(line+1, "!\n");
 		}
 		plen = e-line-1;
 		if(plen >= Plen-3) {
 			err("pattern too big");
 			return -1;
 		}
 		pat[0] = '^';
 		memcpy(pat+1, line+1, plen);
 		pat[plen+1] = '$';
 		pat[plen+2] = 0;
 		if(*e == '\n')
 			line = e;
 		else
 			line = e+1;
 		if(!search(pat, delim == '/')) {
 			err("pattern not found");
 			return -1;
 		}
 	} else if(*line == '#') {
 		/* absolute byte offset into dictionary */
 		line++;
 		if(!isdigit((uchar)*line))
 			return -1;
 		v = strtoul(line, &e, 10);
 		line = e;
 		dot->doff[0] = v;
 		dot->n = 1;
 		dot->cur = 0;
 	} else if(isdigit((uchar)*line)) {
 		v = strtoul(line, &e, 10);
 		line = e;
 		if(v < 1 || v > dot->n)
 			err(".%d not in range [1,%d], ignored",
 				v, dot->n);
 		else
 			dot->cur = v-1;
 	} else if(*line == '.') {
 		line++;
 	} else {
 		*eptr = line;
 		return 0;
 	}
 	while(*line == '+' || *line == '-') {
 		if(*line == '+')
 			setdotnext();
 		else
 			setdotprev();
 		line++;
 	}
 	*eptr = line;
 	return 1;
 }

 /*
  * Index file is sorted by folded field1.
  * Method: find pre, a folded prefix of r.e. pat,
  * and then low = offset to beginning of
  * line in index file where first match of prefix occurs.
  * Then go through index until prefix no longer matches,
  * adding each line that matches real pattern to dot.
  * Finally, sort dot offsets (uniquing).
  * We know pat len < Plen, and that it is surrounded by ^..$
  */
 int
 search(char *pat, int dofold)
 {
 	int needre, prelen, match, n;
 	Reprog *re;
 	long ioff, v;
 	Rune pre[Plen];
 	Rune lit[Plen];
 	Rune entry[Fieldlen];
 	char fpat[Plen];

 	prelen = getpref(pat+1, pre);
 	if(pat[prelen+1] == 0 || pat[prelen+1] == '$') {
 		runescpy(lit, pre);
 		if(dofold)
 			fold(lit);
 		needre = 0;
 		SET(re);
 	} else {
 		needre = 1;
 		if(dofold) {
 			foldre(fpat, pat);
 			re = regcomp(fpat);
 		} else
 			re = regcomp(pat);
 	}
 	fold(pre);
 	ioff = locate(pre);
 	if(ioff < 0)
 		return 0;
 	dot->n = 0;
 	Bseek(bindex, ioff, 0);
 	for(;;) {
 		if(!getfield(entry))
 			break;
 		if(dofold)
 			fold(entry);
 		if(needre)
 			match = rregexec(re, entry, 0, 0);
 		else
 			match = (acomp(lit, entry) == 0);
 		if(match) {
 			if(!getfield(entry))
 				break;
 			v = runetol(entry);
 			if(dot->n >= dot->maxn) {
 				n = 2*dot->maxn;
 				dot = realloc(dot,
 					sizeof(Addr)+(n-1)*sizeof(long));
 				if(!dot) {
 					err("out of memory");
 					exits("nomem");
 				}
 				dot->maxn = n;
 			}
 			dot->doff[dot->n++] = v;
 		} else {
 			if(!dofold)
 				fold(entry);
 			if(*pre) {
 				n = acomp(pre, entry);
 				if(n < -1 || (!needre && n < 0))
 					break;
 			}
 			/* get to next index entry */
 			if(!getfield(entry))
 				break;
 		}
 	}
 	sortaddr(dot);
 	dot->cur = 0;
 	return dot->n;
 }

 /*
  * Return offset in index file of first line whose folded
  * first field has pre as a prefix.  -1 if none found.
  */
 long
 locate(Rune *pre)
 {
 	long top, bot, mid;
 	Rune entry[Fieldlen];

 	if(*pre == 0)
 		return 0;
 	bot = 0;
 	top = indextop;
 	if(debug>1)
 		fprint(2, "locate looking for prefix %S\n", pre);
 	for(;;) {
 		/*
 		 * Loop invariant: foldkey(bot) < pre <= foldkey(top)
 		 * and bot < top, and bot,top point at beginning of lines
 		 */
 		mid = (top+bot) / 2;
 		mid = seeknextline(bindex, mid);
 		if(debug > 1)
 			fprint(2, "bot=%ld, mid=%ld->%ld, top=%ld\n",
 				bot, (top+bot) / 2, mid, top);
 		if(mid == top || !getfield(entry))
 			break;
 		if(debug > 1)
 			fprint(2, "key=%S\n", entry);
 		/*
 		 * here mid is strictly between bot and top
 		 */
 		fold(entry);
 		if(acomp(pre, entry) <= 0)
 			top = mid;
 		else
 			bot = mid;
 	}
 	/*
 	 * bot < top, but they don't necessarily point at successive lines
 	 * Use linear search from bot to find first line that pre is a
 	 * prefix of
 	 */
 	while((bot = seeknextline(bindex, bot)) <= top) {
 		if(!getfield(entry))
 			return -1;
 		if(debug > 1)
 			fprint(2, "key=%S\n", entry);
 		fold(entry);
 		switch(acomp(pre, entry)) {
 		case -2:
 			return -1;
 		case -1:
 		case 0:
 			return bot;
 		case 1:
 		case 2:
 			continue;
 		}
 	}
 	return -1;

 }

 /*
  * Get prefix of non re-metacharacters, runified, into pre,
  * and return length
  */
 int
 getpref(char *pat, Rune *pre)
 {
 	int n, r;
 	char *p;

 	p = pat;
 	while(*p) {
 		n = chartorune(pre, p);
 		r = *pre;
 		switch(r) {
 		case 0x2e: case 0x2a: case 0x2b: case 0x3f:
 		case 0x5b: case 0x5d: case 0x28: case ')':
 		case 0x7c: case 0x5e: case 0x24:
 			*pre = 0;
 			return p-pat;
 		case '\\':
 			p += n;
 			p += chartorune(++pre, p);
 			pre++;
 			break;
 		default:
 			p += n;
 			pre++;
 		}
 	}
 	return p-pat;
 }

 long
 seeknextline(Biobuf *b, long off)
 {
 	long c;

 	Bseek(b, off, 0);
 	do {
 		c = Bgetrune(b);
 	} while(c>=0 && c!='\n');
 	return Boffset(b);
 }

 /*
  * Get next field out of index file (either tab- or nl- terminated)
  * Answer in *rp, assumed to be Fieldlen long.
  * Return 0 if read error first.
  */
 int
 getfield(Rune *rp)
 {
 	long c;
 	int n;

 	for(n=Fieldlen; n-- > 0; ) {
 		if ((c = Bgetrune(bindex)) < 0)
 			return 0;
 		if(c == '\t' || c == '\n') {
 			*rp = '\0';
 			return 1;
 		}
 		*rp++ = c;
 	}
 	err("word too long");
 	return 0;
 }

 /*
  * A compare longs function suitable for qsort
  */
 static int
 longcmp(const void *av, const void *bv)
 {
 	long v;
 	long *a, *b;

 	a = (long*)av;
 	b = (long*)bv;

 	v = *a - *b;
 	if(v < 0)
 		return -1;
 	else if(v == 0)
 		return 0;
 	else
 		return 1;
 }

 void
 sortaddr(Addr *a)
 {
 	int i, j;
 	long v;

 	if(a->n <= 1)
 		return;

 	qsort(a->doff, a->n, sizeof(long), longcmp);

 	/* remove duplicates */
 	for(i=0, j=0; j < a->n; j++) {
 		v = a->doff[j];
 		if(i > 0 && v == a->doff[i-1])
 			continue;
 		a->doff[i++] = v;
 	}
 	a->n = i;
 }

 Entry
 getentry(int i)
 {
 	long b, e, n;
 	static Entry ans;
 	static int anslen = 0;

 	b = dot->doff[i];
 	e = (*dict->nextoff)(b+1);
 	ans.doff = b;
 	if(e < 0) {
 		err("couldn't seek to entry");
 		ans.start = 0;
 		ans.end = 0;
 	} else {
 		n = e-b;
 		if(n+1 > anslen) {
 			ans.start = realloc(ans.start, n+1);
 			if(!ans.start) {
 				err("out of memory");
 				exits("nomem");
 			}
 			anslen = n+1;
 		}
 		Bseek(bdict, b, 0);
 		n = Bread(bdict, ans.start, n);
 		ans.end = ans.start + n;
 		*ans.end = 0;
 	}
 	return ans;
 }

 void
 setdotnext(void)
 {
 	long b;

 	b = (*dict->nextoff)(dot->doff[dot->cur]+1);
 	if(b < 0) {
 		err("couldn't find a next entry");
 		return;
 	}
 	dot->doff[0] = b;
 	dot->n = 1;
 	dot->cur = 0;
 }

 void
 setdotprev(void)
 {
 	int tryback;
 	long here, last, p;

 	if(dot->cur < 0 || dot->cur >= dot->n)
 		return;
 	tryback = 2000;
 	here = dot->doff[dot->cur];
 	last = 0;
 	while(last == 0) {
 		p = here - tryback;
 		if(p < 0)
 			p = 0;
 		for(;;) {
 			p = (*dict->nextoff)(p+1);
 			if(p < 0)
 				return; /* shouldn't happen */
 			if(p >= here)
 				break;
 			last = p;
 		}
 		if(!last) {
 			if(here - tryback < 0) {
 				err("can't find a previous entry");
 				return;
 			}
 			tryback = 2*tryback;
 		}
 	}
 	dot->doff[0] = last;
 	dot->n = 1;
 	dot->cur = 0;
 }

 /*
  * find the specified file and return a path.
  * default location is #9/dict, but can be
  * in $dictdir instead.
  */
 char*
 dictfile(char *f)
 {
 	static char *dict;
 	static int did;

 	if(!did){
 		dict = getenv("dictpath");
 		did = 1;
 	}

 	if(dict)
 		return smprint("%s/%s", dict, f);
 	return unsharp(smprint("#9/dict/%s", f));
 }
	#include <u.h>
	#include <libc.h>
	#include <bio.h>
	#include <regexp.h>
	#include <ctype.h>
	#include "dict.h"

	/*
	* Assumed index file structure: lines of form
	* [^\t]+\t[0-9]+
	* First field is key, second is byte offset into dictionary.
	* Should be sorted with args -u -t' ' +0f -1 +0 -1 +1n -2
	*/
	typedef struct Addr Addr;

	struct Addr {
	int n; /* number of offsets */
	int cur; /* current position within doff array */
	int maxn; /* actual current size of doff array */
	ulong doff[1]; /* doff[maxn], with 0..n-1 significant */
	};

	Biobuf binbuf;
	Biobuf boutbuf;
	Biobuf bin = &binbuf; / user cmd input */
	Biobuf bout = &boutbuf; / output */
	Biobuf bdict; / dictionary */
	Biobuf bindex; / index file */
	long indextop; /* index offset at end of file */
	int lastcmd; /* last executed command */
	Addr dot; / "current" address */
	Dict dict; / current dictionary */
	int linelen;
	int breaklen = 60;
	int outinhibit;
	int debug;

	void execcmd(int);
	int getpref(char, Rune);
	Entry getentry(int);
	int getfield(Rune*);
	long locate(Rune*);
	int parseaddr(char, char*);
	int parsecmd(char*);
	int search(char*, int);
	long seeknextline(Biobuf*, long);
	void setdotnext(void);
	void setdotprev(void);
	void sortaddr(Addr*);
	void usage(void);

	enum {
	Plen=300, /* max length of a search pattern */
	Fieldlen=200, /* max length of an index field */
	Aslots=10 /* initial number of slots in an address */
	};

	void
	main(int argc, char **argv)
	{
	int i, cmd, kflag;
	char line, p;

	Binit(&binbuf, 0, OREAD);
	Binit(&boutbuf, 1, OWRITE);
	kflag = 0;
	line = 0;
	dict = 0;

	for(i=0; dicts[i].name; i++){
	if(access(dictfile(dicts[i].path), 0)>=0 && access(dictfile(dicts[i].indexpath), 0)>=0){
	dict = &dicts[i];
	break;
	}
	}
	ARGBEGIN {
	case 'd':
	p = ARGF();
	dict = 0;
	if(p) {
	for(i=0; dicts[i].name; i++)
	if(strcmp(p, dicts[i].name)==0) {
	dict = &dicts[i];
	break;
	}
	}
	if(!dict)
	usage();
	break;
	case 'c':
	line = ARGF();
	if(!line)
	usage();
	break;
	case 'k':
	kflag++;
	break;
	case 'D':
	debug++;
	break;
	default:
	usage();
	ARGEND }
	if(dict == 0){
	err("no dictionaries present on this system");
	exits("nodict");
	}

	if(kflag) {
	(*dict->printkey)();
	exits(0);
	}
	if(argc > 1)
	usage();
	else if(argc == 1) {
	if(line)
	usage();
	p = argv[0];
	line = malloc(strlen(p)+5);
	sprint(line, "/%s/P\n", p);
	}
	dict->path = dictfile(dict->path);
	dict->indexpath = dictfile(dict->indexpath);
	bdict = Bopen(dict->path, OREAD);
	if(!bdict) {
	err("can't open dictionary %s", dict->path);
	exits("nodict");
	}
	bindex = Bopen(dict->indexpath, OREAD);
	if(!bindex) {
	err("can't open index %s", dict->indexpath);
	exits("noindex");
	}
	indextop = Bseek(bindex, 0L, 2);

	dot = malloc(sizeof(Addr)+(Aslots-1)*sizeof(ulong));
	dot->n = 0;
	dot->cur = 0;
	dot->maxn = Aslots;
	lastcmd = 0;

	if(line) {
	cmd = parsecmd(line);
	if(cmd)
	execcmd(cmd);
	} else {
	for(;;) {
	Bprint(bout, "*");
	Bflush(bout);
	line = Brdline(bin, '\n');
	linelen = 0;
	if(!line)
	break;
	cmd = parsecmd(line);
	if(cmd) {
	execcmd(cmd);
	lastcmd = cmd;
	}
	}
	}
	exits(0);
	}

	void
	usage(void)
	{
	int i;
	char a, b;

	Bprint(bout, "Usage: %s [-d dict] [-k] [-c cmd] [word]\n", argv0);
	Bprint(bout, "dictionaries (brackets mark dictionaries not present on this system):\n");
	for(i = 0; dicts[i].name; i++){
	a = b = "";
	if(access(dictfile(dicts[i].path), 0)<0 \|\| access(dictfile(dicts[i].indexpath), 0)<0){
	a = "[";
	b = "]";
	}
	Bprint(bout, " %s%s\t%s%s\n", a, dicts[i].name, dicts[i].desc, b);
	}
	exits("usage");
	}

	int
	parsecmd(char *line)
	{
	char *e;
	int cmd, ans;

	if(parseaddr(line, &e) >= 0)
	line = e;
	else
	return 0;
	cmd = *line;
	ans = cmd;
	if(isupper(cmd))
	cmd = tolower(cmd);
	if(!(cmd == 'a' \|\| cmd == 'h' \|\| cmd == 'p' \|\| cmd == 'r' \|\|
	cmd == '\n')) {
	err("unknown command %c", cmd);
	return 0;
	}
	if(cmd == '\n')
	switch(lastcmd) {
	case 0: ans = 'H'; break;
	case 'H': ans = 'p'; break;
	default : ans = lastcmd; break;
	}
	else if(line[1] != '\n' && line[1] != 0)
	err("extra stuff after command %c ignored", cmd);
	return ans;
	}

	void
	execcmd(int cmd)
	{
	Entry e;
	int cur, doall;

	if(isupper(cmd)) {
	doall = 1;
	cmd = tolower(cmd);
	cur = 0;
	} else {
	doall = 0;
	cur = dot->cur;
	}
	if(debug && doall && cmd == 'a')
	Bprint(bout, "%d entries, cur=%d\n", dot->n, cur+1);
	for(;;){
	if(cur >= dot->n)
	break;
	if(doall) {
	Bprint(bout, "%d\t", cur+1);
	linelen += 4 + (cur >= 10);
	}
	switch(cmd) {
	case 'a':
	Bprint(bout, "#%lud\n", dot->doff[cur]);
	break;
	case 'h':
	case 'p':
	case 'r':
	e = getentry(cur);
	(*dict->printentry)(e, cmd);
	break;
	}
	cur++;
	if(doall) {
	if(cmd == 'p' \|\| cmd == 'r') {
	Bputc(bout, '\n');
	linelen = 0;
	}
	} else
	break;
	}
	if(cur >= dot->n)
	cur = 0;
	dot->cur = cur;
	}

	/*
	* Address syntax: ('.' \| '/' re '/' \| '!' re '!' \| number \| '#' number) ('+' \| '-')*
	* Answer goes in dot.
	* Return -1 if address starts, but get error.
	* Return 0 if no address.
	*/
	int
	parseaddr(char line, char *eptr)
	{
	int delim, plen;
	ulong v;
	char *e;
	char pat[Plen];

	if(line == '/' \|\| line == '!') {
	/* anchored regular expression match; '!' means no folding */
	if(*line == '/') {
	delim = '/';
	e = strpbrk(line+1, "/\n");
	} else {
	delim = '!';
	e = strpbrk(line+1, "!\n");
	}
	plen = e-line-1;
	if(plen >= Plen-3) {
	err("pattern too big");
	return -1;
	}
	pat[0] = '^';
	memcpy(pat+1, line+1, plen);
	pat[plen+1] = '$';
	pat[plen+2] = 0;
	if(*e == '\n')
	line = e;
	else
	line = e+1;
	if(!search(pat, delim == '/')) {
	err("pattern not found");
	return -1;
	}
	} else if(*line == '#') {
	/* absolute byte offset into dictionary */
	line++;
	if(!isdigit((uchar)*line))
	return -1;
	v = strtoul(line, &e, 10);
	line = e;
	dot->doff[0] = v;
	dot->n = 1;
	dot->cur = 0;
	} else if(isdigit((uchar)*line)) {
	v = strtoul(line, &e, 10);
	line = e;
	if(v < 1 \|\| v > dot->n)
	err(".%d not in range [1,%d], ignored",
	v, dot->n);
	else
	dot->cur = v-1;
	} else if(*line == '.') {
	line++;
	} else {
	*eptr = line;
	return 0;
	}
	while(line == '+' \|\| line == '-') {
	if(*line == '+')
	setdotnext();
	else
	setdotprev();
	line++;
	}
	*eptr = line;
	return 1;
	}

	/*
	* Index file is sorted by folded field1.
	* Method: find pre, a folded prefix of r.e. pat,
	* and then low = offset to beginning of
	* line in index file where first match of prefix occurs.
	* Then go through index until prefix no longer matches,
	* adding each line that matches real pattern to dot.
	* Finally, sort dot offsets (uniquing).
	* We know pat len < Plen, and that it is surrounded by ^..$
	*/
	int
	search(char *pat, int dofold)
	{
	int needre, prelen, match, n;
	Reprog *re;
	long ioff, v;
	Rune pre[Plen];
	Rune lit[Plen];
	Rune entry[Fieldlen];
	char fpat[Plen];

	prelen = getpref(pat+1, pre);
	if(pat[prelen+1] == 0 \|\| pat[prelen+1] == '$') {
	runescpy(lit, pre);
	if(dofold)
	fold(lit);
	needre = 0;
	SET(re);
	} else {
	needre = 1;
	if(dofold) {
	foldre(fpat, pat);
	re = regcomp(fpat);
	} else
	re = regcomp(pat);
	}
	fold(pre);
	ioff = locate(pre);
	if(ioff < 0)
	return 0;
	dot->n = 0;
	Bseek(bindex, ioff, 0);
	for(;;) {
	if(!getfield(entry))
	break;
	if(dofold)
	fold(entry);
	if(needre)
	match = rregexec(re, entry, 0, 0);
	else
	match = (acomp(lit, entry) == 0);
	if(match) {
	if(!getfield(entry))
	break;
	v = runetol(entry);
	if(dot->n >= dot->maxn) {
	n = 2*dot->maxn;
	dot = realloc(dot,
	sizeof(Addr)+(n-1)*sizeof(long));
	if(!dot) {
	err("out of memory");
	exits("nomem");
	}
	dot->maxn = n;
	}
	dot->doff[dot->n++] = v;
	} else {
	if(!dofold)
	fold(entry);
	if(*pre) {
	n = acomp(pre, entry);
	if(n < -1 \|\| (!needre && n < 0))
	break;
	}
	/* get to next index entry */
	if(!getfield(entry))
	break;
	}
	}
	sortaddr(dot);
	dot->cur = 0;
	return dot->n;
	}

	/*
	* Return offset in index file of first line whose folded
	* first field has pre as a prefix. -1 if none found.
	*/
	long
	locate(Rune *pre)
	{
	long top, bot, mid;
	Rune entry[Fieldlen];

	if(*pre == 0)
	return 0;
	bot = 0;
	top = indextop;
	if(debug>1)
	fprint(2, "locate looking for prefix %S\n", pre);
	for(;;) {
	/*
	* Loop invariant: foldkey(bot) < pre <= foldkey(top)
	* and bot < top, and bot,top point at beginning of lines
	*/
	mid = (top+bot) / 2;
	mid = seeknextline(bindex, mid);
	if(debug > 1)
	fprint(2, "bot=%ld, mid=%ld->%ld, top=%ld\n",
	bot, (top+bot) / 2, mid, top);
	if(mid == top \|\| !getfield(entry))
	break;
	if(debug > 1)
	fprint(2, "key=%S\n", entry);
	/*
	* here mid is strictly between bot and top
	*/
	fold(entry);
	if(acomp(pre, entry) <= 0)
	top = mid;
	else
	bot = mid;
	}
	/*
	* bot < top, but they don't necessarily point at successive lines
	* Use linear search from bot to find first line that pre is a
	* prefix of
	*/
	while((bot = seeknextline(bindex, bot)) <= top) {
	if(!getfield(entry))
	return -1;
	if(debug > 1)
	fprint(2, "key=%S\n", entry);
	fold(entry);
	switch(acomp(pre, entry)) {
	case -2:
	return -1;
	case -1:
	case 0:
	return bot;
	case 1:
	case 2:
	continue;
	}
	}
	return -1;

	}

	/*
	* Get prefix of non re-metacharacters, runified, into pre,
	* and return length
	*/
	int
	getpref(char pat, Rune pre)
	{
	int n, r;
	char *p;

	p = pat;
	while(*p) {
	n = chartorune(pre, p);
	r = *pre;
	switch(r) {
	case 0x2e: case 0x2a: case 0x2b: case 0x3f:
	case 0x5b: case 0x5d: case 0x28: case ')':
	case 0x7c: case 0x5e: case 0x24:
	*pre = 0;
	return p-pat;
	case '\\':
	p += n;
	p += chartorune(++pre, p);
	pre++;
	break;
	default:
	p += n;
	pre++;
	}
	}
	return p-pat;
	}

	long
	seeknextline(Biobuf *b, long off)
	{
	long c;

	Bseek(b, off, 0);
	do {
	c = Bgetrune(b);
	} while(c>=0 && c!='\n');
	return Boffset(b);
	}

	/*
	* Get next field out of index file (either tab- or nl- terminated)
	* Answer in *rp, assumed to be Fieldlen long.
	* Return 0 if read error first.
	*/
	int
	getfield(Rune *rp)
	{
	long c;
	int n;

	for(n=Fieldlen; n-- > 0; ) {
	if ((c = Bgetrune(bindex)) < 0)
	return 0;
	if(c == '\t' \|\| c == '\n') {
	*rp = '\0';
	return 1;
	}
	*rp++ = c;
	}
	err("word too long");
	return 0;
	}

	/*
	* A compare longs function suitable for qsort
	*/
	static int
	longcmp(const void av, const void bv)
	{
	long v;
	long a, b;

	a = (long*)av;
	b = (long*)bv;

	v = a - b;
	if(v < 0)
	return -1;
	else if(v == 0)
	return 0;
	else
	return 1;
	}

	void
	sortaddr(Addr *a)
	{
	int i, j;
	long v;

	if(a->n <= 1)
	return;

	qsort(a->doff, a->n, sizeof(long), longcmp);

	/* remove duplicates */
	for(i=0, j=0; j < a->n; j++) {
	v = a->doff[j];
	if(i > 0 && v == a->doff[i-1])
	continue;
	a->doff[i++] = v;
	}
	a->n = i;
	}

	Entry
	getentry(int i)
	{
	long b, e, n;
	static Entry ans;
	static int anslen = 0;

	b = dot->doff[i];
	e = (*dict->nextoff)(b+1);
	ans.doff = b;
	if(e < 0) {
	err("couldn't seek to entry");
	ans.start = 0;
	ans.end = 0;
	} else {
	n = e-b;
	if(n+1 > anslen) {
	ans.start = realloc(ans.start, n+1);
	if(!ans.start) {
	err("out of memory");
	exits("nomem");
	}
	anslen = n+1;
	}
	Bseek(bdict, b, 0);
	n = Bread(bdict, ans.start, n);
	ans.end = ans.start + n;
	*ans.end = 0;
	}
	return ans;
	}

	void
	setdotnext(void)
	{
	long b;

	b = (*dict->nextoff)(dot->doff[dot->cur]+1);
	if(b < 0) {
	err("couldn't find a next entry");
	return;
	}
	dot->doff[0] = b;
	dot->n = 1;
	dot->cur = 0;
	}

	void
	setdotprev(void)
	{
	int tryback;
	long here, last, p;

	if(dot->cur < 0 \|\| dot->cur >= dot->n)
	return;
	tryback = 2000;
	here = dot->doff[dot->cur];
	last = 0;
	while(last == 0) {
	p = here - tryback;
	if(p < 0)
	p = 0;
	for(;;) {
	p = (*dict->nextoff)(p+1);
	if(p < 0)
	return; /* shouldn't happen */
	if(p >= here)
	break;
	last = p;
	}
	if(!last) {
	if(here - tryback < 0) {
	err("can't find a previous entry");
	return;
	}
	tryback = 2*tryback;
	}
	}
	dot->doff[0] = last;
	dot->n = 1;
	dot->cur = 0;
	}

	/*
	* find the specified file and return a path.
	* default location is #9/dict, but can be
	* in $dictdir instead.
	*/
	char*
	dictfile(char *f)
	{
	static char *dict;
	static int did;

	if(!did){
	dict = getenv("dictpath");
	did = 1;
	}

	if(dict)
	return smprint("%s/%s", dict, f);
	return unsharp(smprint("#9/dict/%s", f));
	}