src/cmd/spell/sprog.c - plan9 - Git at Google

 #include <u.h>
 #include <libc.h>
 #include <bio.h>
 #include <ctype.h>
 #include "code.h"

 /* fig leaves for possibly signed char quantities */
 #define ISUPPER(c)	isupper((c)&0xff)
 #define ISLOWER(c)	islower((c)&0xff)
 #define	ISALPHA(c)	isalpha((c)&0xff)
 #define	ISDIGIT(c)	isdigit((c)&0xff)
 #define ISVOWEL(c)	voweltab[(c)&0xff]
 #define Tolower(c)	(ISUPPER(c)? (c)-'A'+'a': (c))
 #define pair(a,b)	(((a)<<8) | (b))
 #define DLEV		2
 #define DSIZ		40

 typedef	long	Bits;
 #define	Set(h, f)	((long)(h) & (f))

 Bits 	nop(char*, char*, char*, int, int);
 Bits 	strip(char*, char*, char*, int, int);
 Bits 	ize(char*, char*, char*, int, int);
 Bits 	i_to_y(char*, char*, char*, int, int);
 Bits 	ily(char*, char*, char*, int, int);
 Bits 	subst(char*, char*, char*, int, int);
 Bits 	CCe(char*, char*, char*, int, int);
 Bits 	tion(char*, char*, char*, int, int);
 Bits 	an(char*, char*, char*, int, int);
 Bits 	s(char*, char*, char*, int, int);
 Bits 	es(char*, char*, char*, int, int);
 Bits 	bility(char*, char*, char*, int, int);
 Bits 	y_to_e(char*, char*, char*, int, int);
 Bits 	VCe(char*, char*, char*, int, int);

 Bits 	trypref(char*, char*, int, int);
 Bits	tryword(char*, char*, int, int);
 Bits 	trysuff(char*, int, int);
 Bits	dict(char*, char*);
 void	typeprint(Bits);
 void	pcomma(char*);

 void	ise(void);
 int	ordinal(void);
 char*	skipv(char*);
 int	inun(char*, Bits);
 char*	ztos(char*);
 void	readdict(char*);

 typedef	struct	Ptab	Ptab;
 struct	Ptab
 {
 	char*	s;
 	int	flag;
 };

 typedef	struct	Suftab	Suftab;
 struct	Suftab
 {
 	char	*suf;
 	Bits	(*p1)(char*, char*, char*, int, int);
 	int	n1;
 	char	*d1;
 	char	*a1;
 	int	flag;
 	int	affixable;
 	Bits	(*p2)(char*, char*, char*, int, int);
 	int	n2;
 	char	*d2;
 	char	*a2;
 };

 Suftab	staba[] = {
 	{"aibohp",subst,1,"-e+ia","",NOUN, NOUN},
 	0
 };

 Suftab	stabc[] =
 {
 	{"cai",strip,1,"","+c",N_AFFIX, ADJ|NOUN},
 	{"citsi",strip,2,"","+ic",N_AFFIX, ADJ | N_AFFIX | NOUN},
 	{"citi",ize,1,"-e+ic","",N_AFFIX, ADJ },
 	{"cihparg",i_to_y,1,"-y+ic","",NOUN, ADJ|NOUN },
 	{"cipocs",ize,1,"-e+ic","",NOUN, ADJ },
 	{"cirtem",i_to_y,1,"-y+ic","",NOUN, ADJ },
 	{"cigol",i_to_y,1,"-y+ic","",NOUN, ADJ },
 	{"cimono",i_to_y,1,"-y+ic","",NOUN, ADJ },
 	{"cibohp",subst,1,"-e+ic","",NOUN, ADJ },
 	0
 };
 Suftab	stabd[] =
 {
 	{"de",strip,1,"","+d",ED,ADJ |COMP,i_to_y,2,"-y+ied","+ed"},
 	{"dooh",ily,4,"-y+ihood","+hood",NOUN | ADV, NOUN},
 	0
 };
 Suftab	stabe[] =
 {
 	/*
 	 * V_affix for comment ->commence->commentment??
 	 */
 	{"ecna",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX},
 	{"ecne",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX},
 	{"elbaif",i_to_y,4,"-y+iable","",V_IRREG,ADJ},
 	{"elba",CCe,4,"-e+able","+able",V_AFFIX,ADJ},
 	{"evi",subst,0,"-ion+ive","",N_AFFIX | V_AFFIX,NOUN | N_AFFIX| ADJ},
 	{"ezi",CCe,3,"-e+ize","+ize",N_AFFIX|ADJ ,V_AFFIX | VERB |ION | COMP},
 	{"ekil",strip,4,"","+like",N_AFFIX ,ADJ},
 	0
 };
 Suftab	stabg[] =
 {
 	{"gniee",strip,3,"","+ing",V_IRREG ,ADJ|NOUN},
 	{"gnikam",strip,6,"","+making",NOUN,NOUN},
 	{"gnipeek",strip,7,"","+keeping",NOUN,NOUN},
 	{"gni",CCe,3,"-e+ing","+ing",V_IRREG ,ADJ|ED|NOUN},
 	0
 };
 Suftab	stabl[] =
 {
 	{"ladio",strip,2,"","+al",NOUN |ADJ,ADJ},
 	{"laci",strip,2,"","+al",NOUN |ADJ,ADJ |NOUN|N_AFFIX},
 	{"latnem",strip,2,"","+al",N_AFFIX,ADJ},
 	{"lanoi",strip,2,"","+al",N_AFFIX,ADJ|NOUN},
 	{"luf",ily,3,"-y+iful","+ful",N_AFFIX,ADJ | NOUN},
 	0
 };
 Suftab	stabm[] =
 {
 		/* congregational + ism */
 	{"msi",CCe,3,"-e+ism","ism",N_AFFIX|ADJ,NOUN},
 	{"margo",subst,-1,"-ph+m","",NOUN,NOUN},
 	0
 };
 Suftab	stabn[] =
 {
 	{"noitacifi",i_to_y,6,"-y+ication","",ION,NOUN | N_AFFIX},
 	{"noitazi",ize,4,"-e+ation","",ION,NOUN| N_AFFIX},
 	{"noit",tion,3,"-e+ion","+ion",ION,NOUN| N_AFFIX | V_AFFIX |VERB|ACTOR},
 	{"naino",an,3,"","+ian",NOUN|PROP_COLLECT,NOUN| N_AFFIX},
 	{"namow",strip,5,"","+woman",MAN,PROP_COLLECT|N_AFFIX},
 	{"nam",strip,3,"","+man",MAN,PROP_COLLECT | N_AFFIX | VERB},
 	{"na",an,1,"","+n",NOUN|PROP_COLLECT,NOUN | N_AFFIX},
 	{"nemow",strip,5,"","+women",MAN,PROP_COLLECT},
 	{"nem",strip,3,"","+man",MAN,PROP_COLLECT},
 	{"nosrep",strip,6,"","+person",MAN,PROP_COLLECT},
 	0
 };
 Suftab	stabp[] =
 {
 	{"pihs",strip,4,"","+ship",NOUN|PROP_COLLECT,NOUN| N_AFFIX},
 	0
 };
 Suftab	stabr[] =
 {
 	{"rehparg",subst,1,"-y+er","",ACTOR,NOUN,strip,2,"","+er"},
 	{"reyhparg",nop,0,"","",0,NOUN},
 	{"reyl",nop,0,"","",0,NOUN},
 	{"rekam",strip,5,"","+maker",NOUN,NOUN},
 	{"repeek",strip,6,"","+keeper",NOUN,NOUN},
 	{"re",strip,1,"","+r",ACTOR,NOUN | N_AFFIX|VERB|ADJ,	i_to_y,2,"-y+ier","+er"},
 	{"rota",tion,2,"-e+or","",ION,NOUN| N_AFFIX|_Y},
 	{"rotc",tion,2,"","+or",ION,NOUN| N_AFFIX},
 	{"rotp",tion,2,"","+or",ION,NOUN| N_AFFIX},
 	0
 };
 Suftab	stabs[] =
 {
 	{"ssen",ily,4,"-y+iness","+ness",ADJ|ADV,NOUN| N_AFFIX},
 	{"ssel",ily,4,"-y+iless","+less",NOUN | PROP_COLLECT,ADJ },
 	{"se",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH ,	es,2,"-y+ies","+es"},
 	{"s'",s,2,"","+'s",PROP_COLLECT | NOUN,DONT_TOUCH },
 	{"s",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH  },
 	0
 };
 Suftab	stabt[] =
 {
 	{"tnem",strip,4,"","+ment",V_AFFIX,NOUN | N_AFFIX | ADJ|VERB},
 	{"tse",strip,2,"","+st",EST,DONT_TOUCH,	i_to_y,3,"-y+iest","+est" },
 	{"tsigol",i_to_y,2,"-y+ist","",N_AFFIX,NOUN | N_AFFIX},
 	{"tsi",CCe,3,"-e+ist","+ist",N_AFFIX|ADJ,NOUN | N_AFFIX|COMP},
 	0
 };
 Suftab	staby[] =
 {
 	{"ycna",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX},
 	{"ycne",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX},
 	{"ytilib",bility,5,"-le+ility","",ADJ | V_AFFIX,NOUN | N_AFFIX},
 	{"ytisuo",nop,0,"","",NOUN},
 	{"ytilb",nop,0,"","",0,NOUN},
 	{"yti",CCe,3,"-e+ity","+ity",ADJ ,NOUN | N_AFFIX },
 	{"ylb",y_to_e,1,"-e+y","",ADJ,ADV},
 	{"ylc",nop,0,"","",0},
 	{"ylelb",nop,0,"","",0},
 	{"ylelp",nop,0,"","",0},
 	{"yl",ily,2,"-y+ily","+ly",ADJ,ADV|COMP},
 	{"yrtem",subst,0,"-er+ry","",NOUN,NOUN | N_AFFIX},
 	{"y",CCe,1,"-e+y","+y",_Y,ADJ|COMP},
 	0
 };
 Suftab	stabz[] =
 {
 	0
 };
 Suftab*	suftab[] =
 {
 	staba,
 	stabz,
 	stabc,
 	stabd,
 	stabe,
 	stabz,
 	stabg,
 	stabz,
 	stabz,
 	stabz,
 	stabz,
 	stabl,
 	stabm,
 	stabn,
 	stabz,
 	stabp,
 	stabz,
 	stabr,
 	stabs,
 	stabt,
 	stabz,
 	stabz,
 	stabz,
 	stabz,
 	staby,
 	stabz
 };

 Ptab	ptaba[] =
 {
 	"anti", 0,
 	"auto", 0,
 	0
 };
 Ptab	ptabb[] =
 {
 	"bio", 0,
 	0
 };
 Ptab	ptabc[] =
 {
 	"counter", 0,
 	0
 };
 Ptab	ptabd[] =
 {
 	"dis", 0,
 	0
 };
 Ptab	ptabe[] =
 {
 	"electro", 0,
 	0
 };
 Ptab	ptabf[] =
 {
 	"femto", 0,
 	0
 };
 Ptab	ptabg[] =
 {
 	"geo", 0,
 	"giga", 0,
 	0
 };
 Ptab	ptabh[] =
 {
 	"hyper", 0,
 	0
 };
 Ptab	ptabi[] =
 {
 	"immuno", 0,
 	"im", IN,
 	"intra", 0,
 	"inter", 0,
 	"in", IN,
 	"ir", IN,
 	"iso", 0,
 	0
 };
 Ptab	ptabj[] =
 {
 	0
 };
 Ptab	ptabk[] =
 {
 	"kilo", 0,
 	0
 };
 Ptab	ptabl[] =
 {
 	0
 };
 Ptab	ptabm[] =
 {
 	"magneto", 0,
 	"mega", 0,
 	"meta", 0,
 	"micro", 0,
 	"mid", 0,
 	"milli", 0,
 	"mini", 0,
 	"mis", 0,
 	"mono", 0,
 	"multi", 0,
 	0
 };
 Ptab	ptabn[] =
 {
 	"nano", 0,
 	"neuro", 0,
 	"non", 0,
 	0
 };
 Ptab	ptabo[] =
 {
 	"out", 0,
 	"over", 0,
 	0
 };
 Ptab	ptabp[] =
 {
 	"para", 0,
 	"photo", 0,
 	"pico", 0,
 	"poly", 0,
 	"pre", 0,
 	"pseudo", 0,
 	"psycho", 0,
 	0
 };
 Ptab	ptabq[] =
 {
 	"quasi", 0,
 	0
 };
 Ptab	ptabr[] =
 {
 	"radio", 0,
 	"re", 0,
 	0
 };
 Ptab	ptabs[] =
 {
 	"semi", 0,
 	"stereo", 0,
 	"sub", 0,
 	"super", 0,
 	0
 };
 Ptab	ptabt[] =
 {
 	"tele", 0,
 	"tera", 0,
 	"thermo", 0,
 	0
 };
 Ptab	ptabu[] =
 {
 	"ultra", 0,
 	"under", 0,	/*must precede un*/
 	"un", IN,
 	0
 };
 Ptab	ptabv[] =
 {
 	0
 };
 Ptab	ptabw[] =
 {
 	0
 };
 Ptab	ptabx[] =
 {
 	0
 };
 Ptab	ptaby[] =
 {
 	0
 };
 Ptab	ptabz[] =
 {
 	0
 };

 Ptab*	preftab[] =
 {
 	ptaba,
 	ptabb,
 	ptabc,
 	ptabd,
 	ptabe,
 	ptabf,
 	ptabg,
 	ptabh,
 	ptabi,
 	ptabj,
 	ptabk,
 	ptabl,
 	ptabm,
 	ptabn,
 	ptabo,
 	ptabp,
 	ptabq,
 	ptabr,
 	ptabs,
 	ptabt,
 	ptabu,
 	ptabv,
 	ptabw,
 	ptabx,
 	ptaby,
 	ptabz
 };

 typedef struct {
 	char *mesg;
 	enum { NONE, SUFF, PREF} type;
 } Deriv;

 int	aflag;
 int	cflag;
 int	fflag;
 int	vflag;
 int	xflag;
 int 	nflag;
 char	word[500];
 char*	original;
 Deriv	emptyderiv;
 Deriv	deriv[DSIZ+3];
 char	affix[DSIZ*10];	/* 10 is longest affix message */
 int	prefcount;
 int 	suffcount;
 char*	acmeid;
 char	space[300000];	/* must be as large as "words"+"space" in pcode run */
 Bits	encode[2048];	/* must be as long as "codes" in pcode run */
 int	nencode;
 char	voweltab[256];
 char*	spacep[128*128+1];	/* pointer to words starting with 'xx' */
 Biobuf	bin;
 Biobuf	bout;

 char*	codefile = "#9/lib/amspell";
 char*	brfile = "#9/lib/brspell";
 char*	Usage = "usage";

 void
 main(int argc, char *argv[])
 {
 	char *ep, *cp;
 	char *dp;
 	int j, i, c;
 	int low;
 	Bits h;

 	codefile = unsharp(codefile);
 	brfile = unsharp(brfile);

 	Binit(&bin, 0, OREAD);
 	Binit(&bout, 1, OWRITE);
 	for(i=0; c = "aeiouyAEIOUY"[i]; i++)
 		voweltab[c] = 1;
 	while(argc > 1) {
 		if(argv[1][0] != '-')
 			break;
 		for(i=1; c = argv[1][i]; i++)
 		switch(c) {
 		default:
 			fprint(2, "usage: spell [-bcCvx] [-f file]\n");
 			exits(Usage);

 		case 'a':
 			aflag++;
 			continue;

 		case 'b':
 			ise();
 			if(!fflag)
 				codefile = brfile;
 			continue;

 		case 'C':		/* for "correct" */
 			vflag++;
 		case 'c':		/* for ocr */
 			cflag++;
 			continue;

 		case 'v':
 			vflag++;
 			continue;

 		case 'x':
 			xflag++;
 			continue;

 		case 'f':
 			if(argc <= 2) {
 				fprint(2, "spell: -f requires another argument\n");
 				exits(Usage);
 			}
 			argv++;
 			argc--;
 			codefile = argv[1];
 			fflag++;
 			goto brk;
 		}
 	brk:
 		argv++;
 		argc--;
 	}
 	readdict(codefile);
 	if(argc > 1) {
 		fprint(2, "usage: spell [-bcCvx] [-f file]\n");
 		exits(Usage);
 	}
 	if(aflag)
 		cflag = vflag = 0;

 	for(;;) {
 		affix[0] = 0;
 		original = Brdline(&bin, '\n');
 		if(original == 0)
 			exits(0);
 		original[Blinelen(&bin)-1] = 0;
 		low = 0;

 		if(aflag) {
 			acmeid = original;
 			while(*original != ':')
 				if(*original++ == 0)
 					exits(0);
 			while(*++original != ':')
 				if(*original == 0)
 					exits(0);
 			*original++ = 0;
 		}
 		for(ep=word,dp=original; j = *dp; ep++,dp++) {
 			if(ISLOWER(j))
 				low++;
 			if(ep >= word+sizeof(word)-1)
 				break;
 			*ep = j;
 		}
 		*ep = 0;

 		if(ISDIGIT(word[0]) && ordinal())
 			continue;

 		h = 0;
 		if(!low && !(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH)))
 			for(cp=original+1,dp=word+1; dp<ep; dp++,cp++)
 				*dp = Tolower(*cp);
 		if(!h)
 		for(;;) {	/* at most twice */
 			if(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH))
 				break;
 			if(h = trysuff(ep,0,ALL|STOP|DONT_TOUCH))
 				break;
 			if(!ISUPPER(word[0]))
 				break;
 			cp = original;
 			dp = word;
 			while(*dp = *cp++) {
 					if(!low)
 						*dp = Tolower(*dp);
 				dp++;
 			}
 			word[0] = Tolower(word[0]);
 		}

 		if(cflag) {
 			if(!h || Set(h,STOP))
 				print("-");
 			else if(!vflag)
 				print("+");
 			else
 				print("%c",'0' + (suffcount>0) +
 				   (prefcount>4? 8: 2*prefcount));
 		} else if(!h || Set(h,STOP)) {
 			if(aflag)
 				Bprint(&bout, "%s:%s\n", acmeid, original);
 			else
 				Bprint(&bout, "%s\n", original);
 		} else if(affix[0] != 0 && affix[0] != '.')
 			print("%s\t%s\n", affix, original);
 	}
 }

 /*	strip exactly one suffix and do
  *	indicated routine(s), which may recursively
  *	strip suffixes
  */
 Bits
 trysuff(char* ep, int lev, int flag)
 {
 	Suftab *t;
 	char *cp, *sp;
 	Bits h = 0;
 	int initchar = ep[-1];

 	flag &= ~MONO;
 	lev += DLEV;
 	if(lev < DSIZ) {
 		deriv[lev]  = emptyderiv;
 		deriv[lev-1] = emptyderiv;
 	}
 	if(!ISLOWER(initchar))
 		return h;
 	for(t=suftab[initchar-'a']; sp=t->suf; t++) {
 		cp = ep;
 		while(*sp)
 			if(*--cp != *sp++)
 				goto next;
 		for(sp=ep-t->n1; --sp >= word && !ISVOWEL(*sp);)
 			;
 		if(sp < word)
 			continue;
 		if(!(t->affixable & flag))
 			return 0;
 		h = (*t->p1)(ep-t->n1, t->d1, t->a1, lev+1, t->flag|STOP);
 		if(!h && t->p2!=0) {
 			if(lev < DSIZ) {
 				deriv[lev] = emptyderiv;
 				deriv[lev+1] = emptyderiv;
 			}
 			h = (*t->p2)(ep-t->n2, t->d2, t->a2, lev, t->flag|STOP);
 		}
 		break;
 	next:;
 	}
 	return h;
 }

 Bits
 nop(char* ep, char* d, char* a, int lev, int flag)
 {
 	USED(ep);
 	USED(d);
 	USED(a);
 	USED(lev);
 	USED(flag);
 	return 0;
 }

 Bits
 cstrip(char* ep, char* d, char* a, int lev, int flag)
 {
 	int temp = ep[0];

 	if(ISVOWEL(temp) && ISVOWEL(ep[-1])) {
 		switch(pair(ep[-1],ep[0])) {
 		case pair('a', 'a'):
 		case pair('a', 'e'):
 		case pair('a', 'i'):
 		case pair('e', 'a'):
 		case pair('e', 'e'):
 		case pair('e', 'i'):
 		case pair('i', 'i'):
 		case pair('o', 'a'):
 			return 0;
 		}
 	} else
 	if(temp==ep[-1]&&temp==ep[-2])
 		return 0;
 	return strip(ep,d,a,lev,flag);
 }

 Bits
 strip(char* ep, char* d, char* a, int lev, int flag)
 {
 	Bits h = trypref(ep, a, lev, flag);

 	USED(d);
 	if(Set(h,MONO) && ISVOWEL(*ep) && ISVOWEL(ep[-2]))
 		h = 0;
 	if(h)
 		return h;
 	if(ISVOWEL(*ep) && !ISVOWEL(ep[-1]) && ep[-1]==ep[-2]) {
 		h = trypref(ep-1,a,lev,flag|MONO);
 		if(h)
 			return h;
 	}
 	return trysuff(ep,lev,flag);
 }

 Bits
 s(char* ep, char* d, char* a, int lev, int flag)
 {
 	if(lev > DLEV+1)
 		return 0;
 	if(*ep=='s') {
 		switch(ep[-1]) {
 		case 'y':
 			if(ISVOWEL(ep[-2])||ISUPPER(*word))
 				break;	/*says Kennedys*/
 		case 'x':
 		case 'z':
 		case 's':
 			return 0;
 		case 'h':
 			switch(ep[-2]) {
 			case 'c':
 			case 's':
 				return 0;
 			}
 		}
 	}
 	return strip(ep,d,a,lev,flag);
 }

 Bits
 an(char* ep, char* d, char* a, int lev, int flag)
 {
 	USED(d);
 	if(!ISUPPER(*word))	/*must be proper name*/
 		return 0;
 	return trypref(ep,a,lev,flag);
 }

 Bits
 ize(char* ep, char* d, char* a, int lev, int flag)
 {
 	int temp = ep[-1];
 	Bits h;

 	USED(a);
 	ep[-1] = 'e';
 	h = strip(ep,"",d,lev,flag);
 	ep[-1] = temp;
 	return h;
 }

 Bits
 y_to_e(char* ep, char* d, char* a, int lev, int flag)
 {
 	Bits h;
 	int  temp;

 	USED(a);
 	switch(ep[-1]) {
 	case 'a':
 	case 'e':
 	case 'i':
 		return 0;
 	}
 	temp = *ep;
 	*ep++ = 'e';
 	h = strip(ep,"",d,lev,flag);
 	ep[-1] = temp;
 	return h;
 }

 Bits
 ily(char* ep, char* d, char* a, int lev, int flag)
 {
 	int temp = ep[0];
 	char *cp = ep;

 	if(temp==ep[-1]&&temp==ep[-2])		/* sillly */
 		return 0;
 	if(*--cp=='y' && !ISVOWEL(*--cp))	/* happyly */
 		while(cp>word)
 			if(ISVOWEL(*--cp))	/* shyness */
 				return 0;
 	if(ep[-1]=='i')
 		return i_to_y(ep,d,a,lev,flag);
 	return cstrip(ep,d,a,lev,flag);
 }

 Bits
 bility(char* ep, char* d, char* a, int lev, int flag)
 {
 	*ep++ = 'l';
 	return y_to_e(ep,d,a,lev,flag);
 }

 Bits
 i_to_y(char* ep, char* d, char* a, int lev, int flag)
 {
 	Bits h;
 	int temp;

 	if(ISUPPER(*word))
 		return 0;
 	if((temp=ep[-1])=='i' && !ISVOWEL(ep[-2])) {
 		ep[-1] = 'y';
 		a = d;
 	}
 	h = cstrip(ep,"",a,lev,flag);
 	ep[-1] = temp;
 	return h;
 }

 Bits
 es(char* ep, char* d, char* a, int lev, int flag)
 {
 	if(lev>DLEV)
 		return 0;
 	switch(ep[-1]) {
 	default:
 		return 0;
 	case 'i':
 		return i_to_y(ep,d,a,lev,flag);
 	case 'h':
 		switch(ep[-2]) {
 		default:
 			return 0;
 		case 'c':
 		case 's':
 			break;
 		}
 	case 's':
 	case 'z':
 	case 'x':
 		return strip(ep,d,a,lev,flag);
 	}
 }

 Bits
 subst(char* ep, char* d, char* a, int lev, int flag)
 {
 	char *u,*t;
 	Bits h;

 	USED(a);
 	if(skipv(skipv(ep-1)) < word)
 		return 0;
 	for(t=d; *t!='+'; t++)
 		continue;
 	for(u=ep; *--t!='-';)
 		*--u = *t;
 	h = strip(ep,"",d,lev,flag);
 	while(*++t != '+')
 		continue;
 	while(*++t)
 		*u++ = *t;
 	return h;
 }

 Bits
 tion(char* ep, char* d, char* a, int lev, int flag)
 {
 	switch(ep[-2]) {
 	default:
 		return trypref(ep,a,lev,flag);
 	case 'a':
 	case 'e':
 	case 'i':
 	case 'o':
 	case 'u':
 		return y_to_e(ep,d,a,lev,flag);
 	}
 }

 /*
  * possible consonant-consonant-e ending
  */
 Bits
 CCe(char* ep, char* d, char* a, int lev, int flag)
 {
 	Bits h;

 	switch(ep[-1]) {
 	case 'l':
 		if(ISVOWEL(ep[-2]))
 			break;
 		switch(ep[-2]) {
 		case 'l':
 		case 'r':
 		case 'w':
 			break;
 		default:
 			return y_to_e(ep,d,a,lev,flag);
 		}
 		break;
 	case 'c':
 	case 'g':
 		if(*ep == 'a')	/* prevent -able for -eable */
 			return 0;
 	case 's':
 	case 'v':
 	case 'z':
 		if(ep[-2]==ep[-1])
 			break;
 		if(ISVOWEL(ep[-2]))
 			break;
 	case 'u':
 		if(h = y_to_e(ep,d,a,lev,flag))
 			return h;
 		if(!(ep[-2]=='n' && ep[-1]=='g'))
 			return 0;
 	}
 	return VCe(ep,d,a,lev,flag);
 }

 /*
  * possible consonant-vowel-consonant-e ending
  */
 Bits
 VCe(char* ep, char* d, char* a, int lev, int flag)
 {
 	int c;
 	Bits h;

 	c = ep[-1];
 	if(c=='e')
 		return 0;
 	if(!ISVOWEL(c) && ISVOWEL(ep[-2])) {
 		c = *ep;
 		*ep++ = 'e';
 		h = trypref(ep,d,lev,flag);
 		if(!h)
 			h = trysuff(ep,lev,flag);
 		if(h)
 			return h;
 		ep--;
 		*ep = c;
 	}
 	return cstrip(ep,d,a,lev,flag);
 }

 Ptab*
 lookuppref(uchar** wp, char* ep)
 {
 	Ptab *sp;
 	uchar *bp,*cp;
 	unsigned int initchar = Tolower(**wp);

 	if(!ISALPHA(initchar))
 		return 0;
 	for(sp=preftab[initchar-'a'];sp->s;sp++) {
 		bp = *wp;
 		for(cp= (uchar*)sp->s;*cp; )
 			if(*bp++!=*cp++)
 				goto next;
 		for(cp=bp;cp<(uchar*)ep;cp++)
 			if(ISVOWEL(*cp)) {
 				*wp = bp;
 				return sp;
 			}
 	next:;
 	}
 	return 0;
 }

 /*	while word is not in dictionary try stripping
  *	prefixes. Fail if no more prefixes.
  */
 Bits
 trypref(char* ep, char* a, int lev, int flag)
 {
 	Ptab *tp;
 	char *bp, *cp;
 	char *pp;
 	Bits h;
 	char space[20];

 	if(lev<DSIZ) {
 		deriv[lev].mesg = a;
 		deriv[lev].type = *a=='.'? NONE: SUFF;
 	}
 	if(h = tryword(word,ep,lev,flag)) {
 		if(Set(h, flag&~MONO) && (flag&MONO) <= Set(h, MONO))
 			return h;
 		h = 0;
 	}
 	bp = word;
 	pp = space;
 	if(lev<DSIZ) {
 		deriv[lev+1].mesg = pp;
 		deriv[lev+1].type = 0;
 	}
 	while(tp=lookuppref((uchar**)(void*)&bp,ep)) {
 		*pp++ = '+';
 		cp = tp->s;
 		while(pp<space+sizeof(space) && (*pp = *cp++))
 			pp++;
 		deriv[lev+1].type += PREF;
 		h = tryword(bp,ep,lev+1,flag);
 		if(Set(h,NOPREF) ||
 		   ((tp->flag&IN) && inun(bp-2,h)==0)) {
 			h = 0;
 			break;
 		}
 		if(Set(h,flag&~MONO) && (flag&MONO) <= Set(h, MONO))
 			break;
 		h = 0;
 	}
 	if(lev < DSIZ) {
 		deriv[lev+1] = emptyderiv;
 		deriv[lev+2] = emptyderiv;
 	}
 	return h;
 }

 Bits
 tryword(char* bp, char* ep, int lev, int flag)
 {
 	int  j;
 	Bits h = 0;
 	char duple[3];

 	if(ep-bp <= 1)
 		return h;
 	if(flag&MONO) {
 		if(lev<DSIZ) {
 			deriv[++lev].mesg = duple;
 			deriv[lev].type = SUFF;
 		}
 		duple[0] = '+';
 		duple[1] = *ep;
 		duple[2] = 0;
 	}
 	h = dict(bp, ep);
 	if(vflag==0 || h==0)
 		return h;
 	/*
 	 * when derivations are wanted, collect them
 	 * for printing
 	 */
 	j = lev;
 	prefcount = suffcount = 0;
 	do {
 		if(j<DSIZ && deriv[j].type) {
 			strcat(affix, deriv[j].mesg);
 			if(deriv[j].type == SUFF)
 				suffcount++;
 			else if(deriv[j].type != NONE)
 				prefcount = deriv[j].type/PREF;
 		}
 	} while(--j > 0);
 	return h;
 }

 int
 inun(char* bp, Bits h)
 {
 	if(*bp == 'u')
 		return Set(h, IN) == 0;
 	/* *bp == 'i' */
 	if(Set(h, IN) == 0)
 		return 0;
 	switch(bp[2]) {
 	case 'r':
 		return bp[1] == 'r';
 	case 'm':
 	case 'p':
 		return bp[1] == 'm';
 	}
 	return bp[1] == 'n';
 }

 char*
 skipv(char *s)
 {
 	if(s >= word && ISVOWEL(*s))
 		s--;
 	while(s >= word && !ISVOWEL(*s))
 		s--;
 	return s;
 }

 /*
  * crummy way to Britishise
  */
 void
 ise(void)
 {
 	Suftab *p;
 	int i;

 	for(i=0; i<26; i++)
 		for(p = suftab[i]; p->suf; p++) {
 			p->suf = ztos(p->suf);
 			p->d1 = ztos(p->d1);
 			p->a1 = ztos(p->a1);
 		}
 }

 char*
 ztos(char *as)
 {
 	char *s, *ds;

 	for(s=as; *s; s++)
 		if(*s == 'z')
 			goto copy;
 	return as;

 copy:
 	ds = strdup(as);
 	for(s=ds; *s; s++)
 		if(*s == 'z')
 			*s = 's';
 	return ds;
 }

 Bits
 dict(char* bp, char* ep)
 {
 	char *cp, *cp1, *w, *wp, *we;
 	int n, f;

 	w = bp;
 	we = ep;
 	n = ep-bp;
 	if(n <= 1)
 		return NOUN;

 	f = w[0] & 0x7f;
 	f *= 128;
 	f += w[1] & 0x7f;
 	bp = spacep[f];
 	ep = spacep[f+1];

 loop:
 	if(bp >= ep) {
 		if(xflag)
 			fprint(2, "=%.*s\n", utfnlen(w, n), w);
 		return 0;
 	}
 	/*
 	 * find the beginning of some word in the middle
 	 */
 	cp = bp + (ep-bp)/2;

 	while(cp > bp && !(*cp & 0x80))
 		cp--;
 	while(cp > bp && (cp[-1] & 0x80))
 		cp--;

 	wp = w + 2;	/* skip two letters */
 	cp1 = cp + 2;	/* skip affix code */
 	for(;;) {
 		if(wp >= we) {
 			if(*cp1 & 0x80)
 				goto found;
 			else
 				f = 1;
 			break;
 		}
 		if(*cp1 & 0x80) {
 			f = -1;
 			break;
 		}
 		f = *cp1++ - *wp++;
 		if(f != 0)
 			break;
 	}

 	if(f < 0) {
 		while(!(*cp1 & 0x80))
 			cp1++;
 		bp = cp1;
 		goto loop;
 	}
 	ep = cp;
 	goto loop;

 found:
 	f = ((cp[0] & 0x7) << 8) |
 		(cp[1] & 0xff);
 	if(xflag) {
 		fprint(2, "=%.*s ", utfnlen(w, n), w);
 		typeprint(encode[f]);
 	}
 	return encode[f];
 }

 void
 typeprint(Bits h)
 {

 	pcomma("");
 	if(h & NOUN)
 		pcomma("n");
 	if(h & PROP_COLLECT)
 		pcomma("pc");
 	if(h & VERB) {
 		if((h & VERB) == VERB)
 			pcomma("v");
 		else
 		if((h & VERB) == V_IRREG)
 			pcomma("vi");
 		else
 		if(h & ED)
 			pcomma("ed");
 	}
 	if(h & ADJ)
 		pcomma("a");
 	if(h & COMP) {
 		if((h & COMP) == ACTOR)
 			pcomma("er");
 		else
 			pcomma("comp");
 	}
 	if(h & DONT_TOUCH)
 		pcomma("d");
 	if(h & N_AFFIX)
 		pcomma("na");
 	if(h & ADV)
 		pcomma("adv");
 	if(h & ION)
 		pcomma("ion");
 	if(h & V_AFFIX)
 		pcomma("va");
 	if(h & MAN)
 		pcomma("man");
 	if(h & NOPREF)
 		pcomma("nopref");
 	if(h & MONO)
 		pcomma("ms");
 	if(h & IN)
 		pcomma("in");
 	if(h & _Y)
 		pcomma("y");
 	if(h & STOP)
 		pcomma("s");
 	fprint(2, "\n");
 }

 void
 pcomma(char *s)
 {
 	static int flag;

 	if(*s == 0) {
 		flag = 0;
 		return;
 	}
 	if(!flag) {
 		fprint(2, "%s", s);
 		flag = 1;
 	} else
 		fprint(2, ",%s", s);
 }

 /*
  * is the word on of the following
  *	12th	teen
  *	21st	end in 1
  *	23rd	end in 3
  *	77th	default
  * called knowing word[0] is a digit
  */
 int
 ordinal(void)
 {
 	char *cp = word;
 	static char sp[4];

 	while(ISDIGIT(*cp))
 		cp++;
 	strncpy(sp,cp,3);
 	if(ISUPPER(cp[0]) && ISUPPER(cp[1])) {
 		sp[0] = Tolower(cp[0]);
 		sp[1] = Tolower(cp[1]);
 	}
 	return 0 == strncmp(sp,
 		cp[-2]=='1'? "th":	/* out of bounds if 1 digit */
 		*--cp=='1'? "st":	/* harmless */
 		*cp=='2'? "nd":
 		*cp=='3'? "rd":
 		"th", 3);
 }

 /*
  * read in the dictionary.
  * format is
  * {
  *	short	nencode;
  *	long	encode[nencode];
  *	char	space[*];
  * };
  *
  * the encodings are a table all different
  * affixes.
  * the dictionary proper has 2 bytes
  * that demark and then the rest of the
  * word. the 2 bytes have the following
  *	0x80 0x00	flag
  *	0x78 0x00	count of prefix bytes
  *			common with prev word
  *	0x07 0xff	affix code
  *
  * all ints are big endians in the file.
  */
 void
 readdict(char *file)
 {
 	char *s, *is, *lasts, *ls;
 	int c, i, sp, p;
 	int f;
 	long l;

 	lasts = 0;
 	f = open(file, 0);
 	if(f == -1) {
 		fprint(2, "cannot open %s\n", file);
 		exits("open");
 	}
 	if(read(f, space, 2) != 2)
 		goto bad;
 	nencode = ((space[0]&0xff)<<8) | (space[1]&0xff);
 	if(read(f, space, 4*nencode) != 4*nencode)
 		goto bad;
 	s = space;
 	for(i=0; i<nencode; i++) {
 		l = (long)(s[0] & 0xff) << 24;
 		l |= (s[1] & 0xff) << 16;
 		l |= (s[2] & 0xff) << 8;
 		l |= s[3] & 0xff;
 		encode[i] = (Bits)l;
 		s += 4;
 	}
 	l = read(f, space, sizeof(space));
 	if(l == sizeof(space))
 		goto noroom;
 	is = space + (sizeof(space) - l);
 	memmove(is, space, l);

 	s = space;
 	c = *is++ & 0xff;
 	sp = -1;
 	i = 0;

 loop:
 	if(s > is)
 		goto noroom;
 	if(c < 0) {
 		close(f);
 		while(sp < 128*128)
 			spacep[++sp] = s;
 		*s = (char)0x80;		/* fence */
 		return;
 	}
 	p = (c>>3) & 0xf;
 	*s++ = c;
 	*s++ = *is++ & 0xff;
 	if(p <= 0)
 		i = (*is++ & 0xff)*128;
 	if(p <= 1) {
 		if(!(*is & 0x80))
 			i = i/128*128 + (*is++ & 0xff);
 		if(i <= sp) {
 			fprint(2, "the dict isnt sorted or \n");
 			fprint(2, "memmove didn't work\n");
 			goto bad;
 		}
 		while(sp < i)
 			spacep[++sp] = s-2;
 	}
 	ls = lasts;
 	lasts = s;
 	for(p-=2; p>0; p--)
 		*s++ = *ls++;
 	for(;;) {
 		if(is >= space+sizeof(space)) {
 			c = -1;
 			break;
 		}
 		c = *is++ & 0xff;
 		if(c & 0x80)
 			break;
 		*s++ = c;
 	}
 	*s = 0;
 	goto loop;

 bad:
 	fprint(2, "trouble reading %s\n", file);
 	exits("read");
 noroom:
 	fprint(2, "not enough space for dictionary\n");
 	exits("space");
 }