| #include <u.h> | 
 | #include <libc.h> | 
 | #include <bio.h> | 
 | #include <ctype.h> | 
 | #include "code.h" | 
 |  | 
 | /* fig leaves for possibly signed char quantities */ | 
 | #define ISUPPER(c)	isupper((c)&0xff) | 
 | #define ISLOWER(c)	islower((c)&0xff) | 
 | #define	ISALPHA(c)	isalpha((c)&0xff) | 
 | #define	ISDIGIT(c)	isdigit((c)&0xff) | 
 | #define ISVOWEL(c)	voweltab[(c)&0xff] | 
 | #define Tolower(c)	(ISUPPER(c)? (c)-'A'+'a': (c)) | 
 | #define pair(a,b)	(((a)<<8) | (b)) | 
 | #define DLEV		2 | 
 | #define DSIZ		40 | 
 |  | 
 | typedef	long	Bits; | 
 | #define	Set(h, f)	((long)(h) & (f)) | 
 |  | 
 | Bits 	nop(char*, char*, char*, int, int); | 
 | Bits 	strip(char*, char*, char*, int, int); | 
 | Bits 	ize(char*, char*, char*, int, int); | 
 | Bits 	i_to_y(char*, char*, char*, int, int); | 
 | Bits 	ily(char*, char*, char*, int, int); | 
 | Bits 	subst(char*, char*, char*, int, int); | 
 | Bits 	CCe(char*, char*, char*, int, int); | 
 | Bits 	tion(char*, char*, char*, int, int); | 
 | Bits 	an(char*, char*, char*, int, int); | 
 | Bits 	s(char*, char*, char*, int, int); | 
 | Bits 	es(char*, char*, char*, int, int); | 
 | Bits 	bility(char*, char*, char*, int, int); | 
 | Bits 	y_to_e(char*, char*, char*, int, int); | 
 | Bits 	VCe(char*, char*, char*, int, int); | 
 |  | 
 | Bits 	trypref(char*, char*, int, int); | 
 | Bits	tryword(char*, char*, int, int); | 
 | Bits 	trysuff(char*, int, int); | 
 | Bits	dict(char*, char*); | 
 | void	typeprint(Bits); | 
 | void	pcomma(char*); | 
 |  | 
 | void	ise(void); | 
 | int	ordinal(void); | 
 | char*	skipv(char*); | 
 | int	inun(char*, Bits); | 
 | char*	ztos(char*); | 
 | void	readdict(char*); | 
 |  | 
 | typedef	struct	Ptab	Ptab; | 
 | struct	Ptab | 
 | { | 
 | 	char*	s; | 
 | 	int	flag; | 
 | }; | 
 |  | 
 | typedef	struct	Suftab	Suftab; | 
 | struct	Suftab | 
 | { | 
 | 	char	*suf; | 
 | 	Bits	(*p1)(char*, char*, char*, int, int); | 
 | 	int	n1; | 
 | 	char	*d1; | 
 | 	char	*a1; | 
 | 	int	flag; | 
 | 	int	affixable; | 
 | 	Bits	(*p2)(char*, char*, char*, int, int); | 
 | 	int	n2; | 
 | 	char	*d2; | 
 | 	char	*a2; | 
 | }; | 
 |  | 
 | Suftab	staba[] = { | 
 | 	{"aibohp",subst,1,"-e+ia","",NOUN, NOUN}, | 
 | 	0 | 
 | }; | 
 |  | 
 | Suftab	stabc[] = | 
 | { | 
 | 	{"cai",strip,1,"","+c",N_AFFIX, ADJ|NOUN}, | 
 | 	{"citsi",strip,2,"","+ic",N_AFFIX, ADJ | N_AFFIX | NOUN}, | 
 | 	{"citi",ize,1,"-e+ic","",N_AFFIX, ADJ }, | 
 | 	{"cihparg",i_to_y,1,"-y+ic","",NOUN, ADJ|NOUN }, | 
 | 	{"cipocs",ize,1,"-e+ic","",NOUN, ADJ }, | 
 | 	{"cirtem",i_to_y,1,"-y+ic","",NOUN, ADJ }, | 
 | 	{"cigol",i_to_y,1,"-y+ic","",NOUN, ADJ }, | 
 | 	{"cimono",i_to_y,1,"-y+ic","",NOUN, ADJ }, | 
 | 	{"cibohp",subst,1,"-e+ic","",NOUN, ADJ }, | 
 | 	0 | 
 | }; | 
 | Suftab	stabd[] = | 
 | { | 
 | 	{"de",strip,1,"","+d",ED,ADJ |COMP,i_to_y,2,"-y+ied","+ed"}, | 
 | 	{"dooh",ily,4,"-y+ihood","+hood",NOUN | ADV, NOUN}, | 
 | 	0 | 
 | }; | 
 | Suftab	stabe[] = | 
 | { | 
 | 	/* | 
 | 	 * V_affix for comment ->commence->commentment?? | 
 | 	 */ | 
 | 	{"ecna",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX}, | 
 | 	{"ecne",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX}, | 
 | 	{"elbaif",i_to_y,4,"-y+iable","",V_IRREG,ADJ}, | 
 | 	{"elba",CCe,4,"-e+able","+able",V_AFFIX,ADJ}, | 
 | 	{"evi",subst,0,"-ion+ive","",N_AFFIX | V_AFFIX,NOUN | N_AFFIX| ADJ}, | 
 | 	{"ezi",CCe,3,"-e+ize","+ize",N_AFFIX|ADJ ,V_AFFIX | VERB |ION | COMP}, | 
 | 	{"ekil",strip,4,"","+like",N_AFFIX ,ADJ}, | 
 | 	0 | 
 | }; | 
 | Suftab	stabg[] = | 
 | { | 
 | 	{"gniee",strip,3,"","+ing",V_IRREG ,ADJ|NOUN}, | 
 | 	{"gnikam",strip,6,"","+making",NOUN,NOUN}, | 
 | 	{"gnipeek",strip,7,"","+keeping",NOUN,NOUN}, | 
 | 	{"gni",CCe,3,"-e+ing","+ing",V_IRREG ,ADJ|ED|NOUN}, | 
 | 	0 | 
 | }; | 
 | Suftab	stabl[] = | 
 | { | 
 | 	{"ladio",strip,2,"","+al",NOUN |ADJ,ADJ}, | 
 | 	{"laci",strip,2,"","+al",NOUN |ADJ,ADJ |NOUN|N_AFFIX}, | 
 | 	{"latnem",strip,2,"","+al",N_AFFIX,ADJ}, | 
 | 	{"lanoi",strip,2,"","+al",N_AFFIX,ADJ|NOUN}, | 
 | 	{"luf",ily,3,"-y+iful","+ful",N_AFFIX,ADJ | NOUN}, | 
 | 	0 | 
 | }; | 
 | Suftab	stabm[] = | 
 | { | 
 | 		/* congregational + ism */ | 
 | 	{"msi",CCe,3,"-e+ism","ism",N_AFFIX|ADJ,NOUN}, | 
 | 	{"margo",subst,-1,"-ph+m","",NOUN,NOUN}, | 
 | 	0 | 
 | }; | 
 | Suftab	stabn[] = | 
 | { | 
 | 	{"noitacifi",i_to_y,6,"-y+ication","",ION,NOUN | N_AFFIX}, | 
 | 	{"noitazi",ize,4,"-e+ation","",ION,NOUN| N_AFFIX}, | 
 | 	{"noit",tion,3,"-e+ion","+ion",ION,NOUN| N_AFFIX | V_AFFIX |VERB|ACTOR}, | 
 | 	{"naino",an,3,"","+ian",NOUN|PROP_COLLECT,NOUN| N_AFFIX}, | 
 | 	{"namow",strip,5,"","+woman",MAN,PROP_COLLECT|N_AFFIX}, | 
 | 	{"nam",strip,3,"","+man",MAN,PROP_COLLECT | N_AFFIX | VERB}, | 
 | 	{"na",an,1,"","+n",NOUN|PROP_COLLECT,NOUN | N_AFFIX}, | 
 | 	{"nemow",strip,5,"","+women",MAN,PROP_COLLECT}, | 
 | 	{"nem",strip,3,"","+man",MAN,PROP_COLLECT}, | 
 | 	{"nosrep",strip,6,"","+person",MAN,PROP_COLLECT}, | 
 | 	0 | 
 | }; | 
 | Suftab	stabp[] = | 
 | { | 
 | 	{"pihs",strip,4,"","+ship",NOUN|PROP_COLLECT,NOUN| N_AFFIX}, | 
 | 	0 | 
 | }; | 
 | Suftab	stabr[] = | 
 | { | 
 | 	{"rehparg",subst,1,"-y+er","",ACTOR,NOUN,strip,2,"","+er"}, | 
 | 	{"reyhparg",nop,0,"","",0,NOUN}, | 
 | 	{"reyl",nop,0,"","",0,NOUN}, | 
 | 	{"rekam",strip,5,"","+maker",NOUN,NOUN}, | 
 | 	{"repeek",strip,6,"","+keeper",NOUN,NOUN}, | 
 | 	{"re",strip,1,"","+r",ACTOR,NOUN | N_AFFIX|VERB|ADJ,	i_to_y,2,"-y+ier","+er"}, | 
 | 	{"rota",tion,2,"-e+or","",ION,NOUN| N_AFFIX|_Y}, | 
 | 	{"rotc",tion,2,"","+or",ION,NOUN| N_AFFIX}, | 
 | 	{"rotp",tion,2,"","+or",ION,NOUN| N_AFFIX}, | 
 | 	0 | 
 | }; | 
 | Suftab	stabs[] = | 
 | { | 
 | 	{"ssen",ily,4,"-y+iness","+ness",ADJ|ADV,NOUN| N_AFFIX}, | 
 | 	{"ssel",ily,4,"-y+iless","+less",NOUN | PROP_COLLECT,ADJ }, | 
 | 	{"se",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH ,	es,2,"-y+ies","+es"}, | 
 | 	{"s'",s,2,"","+'s",PROP_COLLECT | NOUN,DONT_TOUCH }, | 
 | 	{"s",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH  }, | 
 | 	0 | 
 | }; | 
 | Suftab	stabt[] = | 
 | { | 
 | 	{"tnem",strip,4,"","+ment",V_AFFIX,NOUN | N_AFFIX | ADJ|VERB}, | 
 | 	{"tse",strip,2,"","+st",EST,DONT_TOUCH,	i_to_y,3,"-y+iest","+est" }, | 
 | 	{"tsigol",i_to_y,2,"-y+ist","",N_AFFIX,NOUN | N_AFFIX}, | 
 | 	{"tsi",CCe,3,"-e+ist","+ist",N_AFFIX|ADJ,NOUN | N_AFFIX|COMP}, | 
 | 	0 | 
 | }; | 
 | Suftab	staby[] = | 
 | { | 
 | 	{"ycna",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX}, | 
 | 	{"ycne",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX}, | 
 | 	{"ytilib",bility,5,"-le+ility","",ADJ | V_AFFIX,NOUN | N_AFFIX}, | 
 | 	{"ytisuo",nop,0,"","",NOUN}, | 
 | 	{"ytilb",nop,0,"","",0,NOUN}, | 
 | 	{"yti",CCe,3,"-e+ity","+ity",ADJ ,NOUN | N_AFFIX }, | 
 | 	{"ylb",y_to_e,1,"-e+y","",ADJ,ADV}, | 
 | 	{"ylc",nop,0,"","",0}, | 
 | 	{"ylelb",nop,0,"","",0}, | 
 | 	{"ylelp",nop,0,"","",0}, | 
 | 	{"yl",ily,2,"-y+ily","+ly",ADJ,ADV|COMP}, | 
 | 	{"yrtem",subst,0,"-er+ry","",NOUN,NOUN | N_AFFIX}, | 
 | 	{"y",CCe,1,"-e+y","+y",_Y,ADJ|COMP}, | 
 | 	0 | 
 | }; | 
 | Suftab	stabz[] = | 
 | { | 
 | 	0 | 
 | }; | 
 | Suftab*	suftab[] = | 
 | { | 
 | 	staba, | 
 | 	stabz, | 
 | 	stabc, | 
 | 	stabd, | 
 | 	stabe, | 
 | 	stabz, | 
 | 	stabg, | 
 | 	stabz, | 
 | 	stabz, | 
 | 	stabz, | 
 | 	stabz, | 
 | 	stabl, | 
 | 	stabm, | 
 | 	stabn, | 
 | 	stabz, | 
 | 	stabp, | 
 | 	stabz, | 
 | 	stabr, | 
 | 	stabs, | 
 | 	stabt, | 
 | 	stabz, | 
 | 	stabz, | 
 | 	stabz, | 
 | 	stabz, | 
 | 	staby, | 
 | 	stabz | 
 | }; | 
 |  | 
 | Ptab	ptaba[] = | 
 | { | 
 | 	"anti", 0, | 
 | 	"auto", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabb[] = | 
 | { | 
 | 	"bio", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabc[] = | 
 | { | 
 | 	"counter", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabd[] = | 
 | { | 
 | 	"dis", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabe[] = | 
 | { | 
 | 	"electro", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabf[] = | 
 | { | 
 | 	"femto", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabg[] = | 
 | { | 
 | 	"geo", 0, | 
 | 	"giga", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabh[] = | 
 | { | 
 | 	"hyper", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabi[] = | 
 | { | 
 | 	"immuno", 0, | 
 | 	"im", IN, | 
 | 	"intra", 0, | 
 | 	"inter", 0, | 
 | 	"in", IN, | 
 | 	"ir", IN, | 
 | 	"iso", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabj[] = | 
 | { | 
 | 	0 | 
 | }; | 
 | Ptab	ptabk[] = | 
 | { | 
 | 	"kilo", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabl[] = | 
 | { | 
 | 	0 | 
 | }; | 
 | Ptab	ptabm[] = | 
 | { | 
 | 	"magneto", 0, | 
 | 	"mega", 0, | 
 | 	"meta", 0, | 
 | 	"micro", 0, | 
 | 	"mid", 0, | 
 | 	"milli", 0, | 
 | 	"mini", 0, | 
 | 	"mis", 0, | 
 | 	"mono", 0, | 
 | 	"multi", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabn[] = | 
 | { | 
 | 	"nano", 0, | 
 | 	"neuro", 0, | 
 | 	"non", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabo[] = | 
 | { | 
 | 	"out", 0, | 
 | 	"over", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabp[] = | 
 | { | 
 | 	"para", 0, | 
 | 	"photo", 0, | 
 | 	"pico", 0, | 
 | 	"poly", 0, | 
 | 	"pre", 0, | 
 | 	"pseudo", 0, | 
 | 	"psycho", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabq[] = | 
 | { | 
 | 	"quasi", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabr[] = | 
 | { | 
 | 	"radio", 0, | 
 | 	"re", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabs[] = | 
 | { | 
 | 	"semi", 0, | 
 | 	"stereo", 0, | 
 | 	"sub", 0, | 
 | 	"super", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabt[] = | 
 | { | 
 | 	"tele", 0, | 
 | 	"tera", 0, | 
 | 	"thermo", 0, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabu[] = | 
 | { | 
 | 	"ultra", 0, | 
 | 	"under", 0,	/*must precede un*/ | 
 | 	"un", IN, | 
 | 	0 | 
 | }; | 
 | Ptab	ptabv[] = | 
 | { | 
 | 	0 | 
 | }; | 
 | Ptab	ptabw[] = | 
 | { | 
 | 	0 | 
 | }; | 
 | Ptab	ptabx[] = | 
 | { | 
 | 	0 | 
 | }; | 
 | Ptab	ptaby[] = | 
 | { | 
 | 	0 | 
 | }; | 
 | Ptab	ptabz[] = | 
 | { | 
 | 	0 | 
 | }; | 
 |  | 
 | Ptab*	preftab[] = | 
 | { | 
 | 	ptaba, | 
 | 	ptabb, | 
 | 	ptabc, | 
 | 	ptabd, | 
 | 	ptabe, | 
 | 	ptabf, | 
 | 	ptabg, | 
 | 	ptabh, | 
 | 	ptabi, | 
 | 	ptabj, | 
 | 	ptabk, | 
 | 	ptabl, | 
 | 	ptabm, | 
 | 	ptabn, | 
 | 	ptabo, | 
 | 	ptabp, | 
 | 	ptabq, | 
 | 	ptabr, | 
 | 	ptabs, | 
 | 	ptabt, | 
 | 	ptabu, | 
 | 	ptabv, | 
 | 	ptabw, | 
 | 	ptabx, | 
 | 	ptaby, | 
 | 	ptabz | 
 | }; | 
 |  | 
 | typedef struct { | 
 | 	char *mesg; | 
 | 	enum { NONE, SUFF, PREF} type; | 
 | } Deriv; | 
 |  | 
 | int	aflag; | 
 | int	cflag; | 
 | int	fflag; | 
 | int	vflag; | 
 | int	xflag; | 
 | int 	nflag; | 
 | char	word[500]; | 
 | char*	original; | 
 | Deriv	emptyderiv; | 
 | Deriv	deriv[DSIZ+3]; | 
 | char	affix[DSIZ*10];	/* 10 is longest affix message */ | 
 | int	prefcount; | 
 | int 	suffcount; | 
 | char*	acmeid; | 
 | char	space[300000];	/* must be as large as "words"+"space" in pcode run */ | 
 | Bits	encode[2048];	/* must be as long as "codes" in pcode run */ | 
 | int	nencode; | 
 | char	voweltab[256]; | 
 | char*	spacep[128*128+1];	/* pointer to words starting with 'xx' */ | 
 | Biobuf	bin; | 
 | Biobuf	bout; | 
 |  | 
 | char*	codefile = "#9/lib/amspell"; | 
 | char*	brfile = "#9/lib/brspell"; | 
 | char*	Usage = "usage"; | 
 |  | 
 | void | 
 | main(int argc, char *argv[]) | 
 | { | 
 | 	char *ep, *cp; | 
 | 	char *dp; | 
 | 	int j, i, c; | 
 | 	int low; | 
 | 	Bits h; | 
 |  | 
 | 	codefile = unsharp(codefile); | 
 | 	brfile = unsharp(brfile); | 
 |  | 
 | 	Binit(&bin, 0, OREAD); | 
 | 	Binit(&bout, 1, OWRITE); | 
 | 	for(i=0; c = "aeiouyAEIOUY"[i]; i++) | 
 | 		voweltab[c] = 1; | 
 | 	while(argc > 1) { | 
 | 		if(argv[1][0] != '-') | 
 | 			break; | 
 | 		for(i=1; c = argv[1][i]; i++) | 
 | 		switch(c) { | 
 | 		default: | 
 | 			fprint(2, "usage: spell [-bcCvx] [-f file]\n"); | 
 | 			exits(Usage); | 
 |  | 
 | 		case 'a': | 
 | 			aflag++; | 
 | 			continue; | 
 |  | 
 | 		case 'b': | 
 | 			ise(); | 
 | 			if(!fflag) | 
 | 				codefile = brfile; | 
 | 			continue; | 
 |  | 
 | 		case 'C':		/* for "correct" */ | 
 | 			vflag++; | 
 | 		case 'c':		/* for ocr */ | 
 | 			cflag++; | 
 | 			continue; | 
 |  | 
 | 		case 'v': | 
 | 			vflag++; | 
 | 			continue; | 
 |  | 
 | 		case 'x': | 
 | 			xflag++; | 
 | 			continue; | 
 |  | 
 | 		case 'f': | 
 | 			if(argc <= 2) { | 
 | 				fprint(2, "spell: -f requires another argument\n"); | 
 | 				exits(Usage); | 
 | 			} | 
 | 			argv++; | 
 | 			argc--; | 
 | 			codefile = argv[1]; | 
 | 			fflag++; | 
 | 			goto brk; | 
 | 		} | 
 | 	brk: | 
 | 		argv++; | 
 | 		argc--; | 
 | 	} | 
 | 	readdict(codefile); | 
 | 	if(argc > 1) { | 
 | 		fprint(2, "usage: spell [-bcCvx] [-f file]\n"); | 
 | 		exits(Usage); | 
 | 	} | 
 | 	if(aflag) | 
 | 		cflag = vflag = 0; | 
 |  | 
 | 	for(;;) { | 
 | 		affix[0] = 0; | 
 | 		original = Brdline(&bin, '\n'); | 
 | 		if(original == 0) | 
 | 			exits(0); | 
 | 		original[Blinelen(&bin)-1] = 0; | 
 | 		low = 0; | 
 |  | 
 | 		if(aflag) { | 
 | 			acmeid = original; | 
 | 			while(*original != ':') | 
 | 				if(*original++ == 0) | 
 | 					exits(0); | 
 | 			while(*++original != ':') | 
 | 				if(*original == 0) | 
 | 					exits(0); | 
 | 			*original++ = 0; | 
 | 		} | 
 | 		for(ep=word,dp=original; j = *dp; ep++,dp++) { | 
 | 			if(ISLOWER(j)) | 
 | 				low++; | 
 | 			if(ep >= word+sizeof(word)-1) | 
 | 				break; | 
 | 			*ep = j; | 
 | 		} | 
 | 		*ep = 0; | 
 |  | 
 | 		if(ISDIGIT(word[0]) && ordinal()) | 
 | 			continue; | 
 |  | 
 | 		h = 0; | 
 | 		if(!low && !(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH))) | 
 | 			for(cp=original+1,dp=word+1; dp<ep; dp++,cp++) | 
 | 				*dp = Tolower(*cp); | 
 | 		if(!h) | 
 | 		for(;;) {	/* at most twice */ | 
 | 			if(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH)) | 
 | 				break; | 
 | 			if(h = trysuff(ep,0,ALL|STOP|DONT_TOUCH)) | 
 | 				break; | 
 | 			if(!ISUPPER(word[0])) | 
 | 				break; | 
 | 			cp = original; | 
 | 			dp = word; | 
 | 			while(*dp = *cp++) { | 
 | 					if(!low) | 
 | 						*dp = Tolower(*dp); | 
 | 				dp++; | 
 | 			} | 
 | 			word[0] = Tolower(word[0]); | 
 | 		} | 
 |  | 
 | 		if(cflag) { | 
 | 			if(!h || Set(h,STOP)) | 
 | 				print("-"); | 
 | 			else if(!vflag) | 
 | 				print("+"); | 
 | 			else  | 
 | 				print("%c",'0' + (suffcount>0) + | 
 | 				   (prefcount>4? 8: 2*prefcount)); | 
 | 		} else if(!h || Set(h,STOP)) { | 
 | 			if(aflag) | 
 | 				Bprint(&bout, "%s:%s\n", acmeid, original); | 
 | 			else | 
 | 				Bprint(&bout, "%s\n", original); | 
 | 		} else if(affix[0] != 0 && affix[0] != '.') | 
 | 			print("%s\t%s\n", affix, original); | 
 | 	} | 
 | } | 
 |  | 
 | /*	strip exactly one suffix and do | 
 |  *	indicated routine(s), which may recursively | 
 |  *	strip suffixes | 
 |  */ | 
 | Bits | 
 | trysuff(char* ep, int lev, int flag) | 
 | { | 
 | 	Suftab *t; | 
 | 	char *cp, *sp; | 
 | 	Bits h = 0; | 
 | 	int initchar = ep[-1]; | 
 |  | 
 | 	flag &= ~MONO; | 
 | 	lev += DLEV; | 
 | 	if(lev < DSIZ) { | 
 | 		deriv[lev]  = emptyderiv; | 
 | 		deriv[lev-1] = emptyderiv; | 
 | 	} | 
 | 	if(!ISLOWER(initchar)) | 
 | 		return h; | 
 | 	for(t=suftab[initchar-'a']; sp=t->suf; t++) { | 
 | 		cp = ep; | 
 | 		while(*sp) | 
 | 			if(*--cp != *sp++) | 
 | 				goto next; | 
 | 		for(sp=ep-t->n1; --sp >= word && !ISVOWEL(*sp);) | 
 | 			; | 
 | 		if(sp < word) | 
 | 			continue; | 
 | 		if(!(t->affixable & flag)) | 
 | 			return 0; | 
 | 		h = (*t->p1)(ep-t->n1, t->d1, t->a1, lev+1, t->flag|STOP); | 
 | 		if(!h && t->p2!=0) { | 
 | 			if(lev < DSIZ) { | 
 | 				deriv[lev] = emptyderiv; | 
 | 				deriv[lev+1] = emptyderiv; | 
 | 			} | 
 | 			h = (*t->p2)(ep-t->n2, t->d2, t->a2, lev, t->flag|STOP); | 
 | 		} | 
 | 		break; | 
 | 	next:; | 
 | 	} | 
 | 	return h; | 
 | } | 
 |  | 
 | Bits | 
 | nop(char* ep, char* d, char* a, int lev, int flag) | 
 | { | 
 | 	USED(ep); | 
 | 	USED(d); | 
 | 	USED(a); | 
 | 	USED(lev); | 
 | 	USED(flag); | 
 | 	return 0; | 
 | } | 
 |  | 
 | Bits | 
 | cstrip(char* ep, char* d, char* a, int lev, int flag) | 
 | { | 
 | 	int temp = ep[0]; | 
 |  | 
 | 	if(ISVOWEL(temp) && ISVOWEL(ep[-1])) { | 
 | 		switch(pair(ep[-1],ep[0])) { | 
 | 		case pair('a', 'a'): | 
 | 		case pair('a', 'e'): | 
 | 		case pair('a', 'i'): | 
 | 		case pair('e', 'a'): | 
 | 		case pair('e', 'e'): | 
 | 		case pair('e', 'i'): | 
 | 		case pair('i', 'i'): | 
 | 		case pair('o', 'a'): | 
 | 			return 0; | 
 | 		} | 
 | 	} else | 
 | 	if(temp==ep[-1]&&temp==ep[-2]) | 
 | 		return 0; | 
 | 	return strip(ep,d,a,lev,flag); | 
 | } | 
 |  | 
 | Bits | 
 | strip(char* ep, char* d, char* a, int lev, int flag) | 
 | { | 
 | 	Bits h = trypref(ep, a, lev, flag); | 
 |  | 
 | 	USED(d); | 
 | 	if(Set(h,MONO) && ISVOWEL(*ep) && ISVOWEL(ep[-2])) | 
 | 		h = 0; | 
 | 	if(h) | 
 | 		return h; | 
 | 	if(ISVOWEL(*ep) && !ISVOWEL(ep[-1]) && ep[-1]==ep[-2]) { | 
 | 		h = trypref(ep-1,a,lev,flag|MONO); | 
 | 		if(h) | 
 | 			return h; | 
 | 	} | 
 | 	return trysuff(ep,lev,flag); | 
 | } | 
 |  | 
 | Bits | 
 | s(char* ep, char* d, char* a, int lev, int flag) | 
 | { | 
 | 	if(lev > DLEV+1) | 
 | 		return 0; | 
 | 	if(*ep=='s') { | 
 | 		switch(ep[-1]) { | 
 | 		case 'y': | 
 | 			if(ISVOWEL(ep[-2])||ISUPPER(*word)) | 
 | 				break;	/*says Kennedys*/ | 
 | 		case 'x': | 
 | 		case 'z': | 
 | 		case 's': | 
 | 			return 0; | 
 | 		case 'h': | 
 | 			switch(ep[-2]) { | 
 | 			case 'c': | 
 | 			case 's': | 
 | 				return 0; | 
 | 			} | 
 | 		} | 
 | 	} | 
 | 	return strip(ep,d,a,lev,flag); | 
 | } | 
 |  | 
 | Bits | 
 | an(char* ep, char* d, char* a, int lev, int flag) | 
 | { | 
 | 	USED(d); | 
 | 	if(!ISUPPER(*word))	/*must be proper name*/ | 
 | 		return 0; | 
 | 	return trypref(ep,a,lev,flag); | 
 | } | 
 |  | 
 | Bits | 
 | ize(char* ep, char* d, char* a, int lev, int flag) | 
 | { | 
 | 	int temp = ep[-1]; | 
 | 	Bits h; | 
 |  | 
 | 	USED(a); | 
 | 	ep[-1] = 'e'; | 
 | 	h = strip(ep,"",d,lev,flag); | 
 | 	ep[-1] = temp; | 
 | 	return h; | 
 | } | 
 |  | 
 | Bits | 
 | y_to_e(char* ep, char* d, char* a, int lev, int flag) | 
 | { | 
 | 	Bits h; | 
 | 	int  temp; | 
 |  | 
 | 	USED(a); | 
 | 	switch(ep[-1]) { | 
 | 	case 'a': | 
 | 	case 'e': | 
 | 	case 'i': | 
 | 		return 0; | 
 | 	} | 
 | 	temp = *ep; | 
 | 	*ep++ = 'e'; | 
 | 	h = strip(ep,"",d,lev,flag); | 
 | 	ep[-1] = temp; | 
 | 	return h; | 
 | } | 
 |  | 
 | Bits | 
 | ily(char* ep, char* d, char* a, int lev, int flag) | 
 | { | 
 | 	int temp = ep[0]; | 
 | 	char *cp = ep; | 
 |  | 
 | 	if(temp==ep[-1]&&temp==ep[-2])		/* sillly */ | 
 | 		return 0; | 
 | 	if(*--cp=='y' && !ISVOWEL(*--cp))	/* happyly */ | 
 | 		while(cp>word) | 
 | 			if(ISVOWEL(*--cp))	/* shyness */ | 
 | 				return 0; | 
 | 	if(ep[-1]=='i') | 
 | 		return i_to_y(ep,d,a,lev,flag); | 
 | 	return cstrip(ep,d,a,lev,flag); | 
 | } | 
 |  | 
 | Bits | 
 | bility(char* ep, char* d, char* a, int lev, int flag) | 
 | { | 
 | 	*ep++ = 'l'; | 
 | 	return y_to_e(ep,d,a,lev,flag); | 
 | } | 
 |  | 
 | Bits | 
 | i_to_y(char* ep, char* d, char* a, int lev, int flag) | 
 | { | 
 | 	Bits h; | 
 | 	int temp; | 
 |  | 
 | 	if(ISUPPER(*word)) | 
 | 		return 0; | 
 | 	if((temp=ep[-1])=='i' && !ISVOWEL(ep[-2])) { | 
 | 		ep[-1] = 'y'; | 
 | 		a = d; | 
 | 	} | 
 | 	h = cstrip(ep,"",a,lev,flag); | 
 | 	ep[-1] = temp; | 
 | 	return h; | 
 | } | 
 |  | 
 | Bits | 
 | es(char* ep, char* d, char* a, int lev, int flag) | 
 | { | 
 | 	if(lev>DLEV) | 
 | 		return 0; | 
 | 	switch(ep[-1]) { | 
 | 	default: | 
 | 		return 0; | 
 | 	case 'i': | 
 | 		return i_to_y(ep,d,a,lev,flag); | 
 | 	case 'h': | 
 | 		switch(ep[-2]) { | 
 | 		default: | 
 | 			return 0; | 
 | 		case 'c': | 
 | 		case 's': | 
 | 			break; | 
 | 		} | 
 | 	case 's': | 
 | 	case 'z': | 
 | 	case 'x': | 
 | 		return strip(ep,d,a,lev,flag); | 
 | 	} | 
 | } | 
 |  | 
 | Bits | 
 | subst(char* ep, char* d, char* a, int lev, int flag) | 
 | { | 
 | 	char *u,*t; | 
 | 	Bits h; | 
 |  | 
 | 	USED(a); | 
 | 	if(skipv(skipv(ep-1)) < word) | 
 | 		return 0; | 
 | 	for(t=d; *t!='+'; t++) | 
 | 		continue; | 
 | 	for(u=ep; *--t!='-';) | 
 | 		*--u = *t; | 
 | 	h = strip(ep,"",d,lev,flag); | 
 | 	while(*++t != '+') | 
 | 		continue; | 
 | 	while(*++t) | 
 | 		*u++ = *t; | 
 | 	return h; | 
 | } | 
 |  | 
 | Bits | 
 | tion(char* ep, char* d, char* a, int lev, int flag) | 
 | { | 
 | 	switch(ep[-2]) { | 
 | 	default: | 
 | 		return trypref(ep,a,lev,flag); | 
 | 	case 'a': | 
 | 	case 'e': | 
 | 	case 'i': | 
 | 	case 'o': | 
 | 	case 'u': | 
 | 		return y_to_e(ep,d,a,lev,flag); | 
 | 	} | 
 | } | 
 |  | 
 | /* | 
 |  * possible consonant-consonant-e ending | 
 |  */ | 
 | Bits | 
 | CCe(char* ep, char* d, char* a, int lev, int flag) | 
 | { | 
 | 	Bits h; | 
 |  | 
 | 	switch(ep[-1]) { | 
 | 	case 'l': | 
 | 		if(ISVOWEL(ep[-2])) | 
 | 			break; | 
 | 		switch(ep[-2]) { | 
 | 		case 'l': | 
 | 		case 'r': | 
 | 		case 'w': | 
 | 			break; | 
 | 		default: | 
 | 			return y_to_e(ep,d,a,lev,flag); | 
 | 		} | 
 | 		break; | 
 | 	case 'c': | 
 | 	case 'g': | 
 | 		if(*ep == 'a')	/* prevent -able for -eable */ | 
 | 			return 0; | 
 | 	case 's': | 
 | 	case 'v': | 
 | 	case 'z': | 
 | 		if(ep[-2]==ep[-1]) | 
 | 			break; | 
 | 		if(ISVOWEL(ep[-2])) | 
 | 			break; | 
 | 	case 'u': | 
 | 		if(h = y_to_e(ep,d,a,lev,flag)) | 
 | 			return h; | 
 | 		if(!(ep[-2]=='n' && ep[-1]=='g')) | 
 | 			return 0; | 
 | 	} | 
 | 	return VCe(ep,d,a,lev,flag); | 
 | } | 
 |  | 
 | /* | 
 |  * possible consonant-vowel-consonant-e ending | 
 |  */ | 
 | Bits | 
 | VCe(char* ep, char* d, char* a, int lev, int flag) | 
 | { | 
 | 	int c; | 
 | 	Bits h; | 
 |  | 
 | 	c = ep[-1]; | 
 | 	if(c=='e') | 
 | 		return 0; | 
 | 	if(!ISVOWEL(c) && ISVOWEL(ep[-2])) { | 
 | 		c = *ep; | 
 | 		*ep++ = 'e'; | 
 | 		h = trypref(ep,d,lev,flag); | 
 | 		if(!h) | 
 | 			h = trysuff(ep,lev,flag); | 
 | 		if(h) | 
 | 			return h; | 
 | 		ep--; | 
 | 		*ep = c; | 
 | 	} | 
 | 	return cstrip(ep,d,a,lev,flag); | 
 | } | 
 |  | 
 | Ptab* | 
 | lookuppref(uchar** wp, char* ep) | 
 | { | 
 | 	Ptab *sp; | 
 | 	uchar *bp,*cp; | 
 | 	unsigned int initchar = Tolower(**wp); | 
 |  | 
 | 	if(!ISALPHA(initchar)) | 
 | 		return 0; | 
 | 	for(sp=preftab[initchar-'a'];sp->s;sp++) { | 
 | 		bp = *wp; | 
 | 		for(cp= (uchar*)sp->s;*cp; ) | 
 | 			if(*bp++!=*cp++) | 
 | 				goto next; | 
 | 		for(cp=bp;cp<(uchar*)ep;cp++) | 
 | 			if(ISVOWEL(*cp)) { | 
 | 				*wp = bp; | 
 | 				return sp; | 
 | 			} | 
 | 	next:; | 
 | 	} | 
 | 	return 0; | 
 | } | 
 |  | 
 | /*	while word is not in dictionary try stripping | 
 |  *	prefixes. Fail if no more prefixes. | 
 |  */ | 
 | Bits | 
 | trypref(char* ep, char* a, int lev, int flag) | 
 | { | 
 | 	Ptab *tp; | 
 | 	char *bp, *cp; | 
 | 	char *pp; | 
 | 	Bits h; | 
 | 	char space[20]; | 
 |  | 
 | 	if(lev<DSIZ) { | 
 | 		deriv[lev].mesg = a; | 
 | 		deriv[lev].type = *a=='.'? NONE: SUFF; | 
 | 	} | 
 | 	if(h = tryword(word,ep,lev,flag)) { | 
 | 		if(Set(h, flag&~MONO) && (flag&MONO) <= Set(h, MONO)) | 
 | 			return h; | 
 | 		h = 0; | 
 | 	} | 
 | 	bp = word; | 
 | 	pp = space; | 
 | 	if(lev<DSIZ) { | 
 | 		deriv[lev+1].mesg = pp; | 
 | 		deriv[lev+1].type = 0; | 
 | 	} | 
 | 	while(tp=lookuppref((uchar**)(void*)&bp,ep)) { | 
 | 		*pp++ = '+'; | 
 | 		cp = tp->s; | 
 | 		while(pp<space+sizeof(space) && (*pp = *cp++)) | 
 | 			pp++; | 
 | 		deriv[lev+1].type += PREF; | 
 | 		h = tryword(bp,ep,lev+1,flag); | 
 | 		if(Set(h,NOPREF) || | 
 | 		   ((tp->flag&IN) && inun(bp-2,h)==0)) { | 
 | 			h = 0; | 
 | 			break; | 
 | 		} | 
 | 		if(Set(h,flag&~MONO) && (flag&MONO) <= Set(h, MONO)) | 
 | 			break; | 
 | 		h = 0; | 
 | 	} | 
 | 	if(lev < DSIZ) { | 
 | 		deriv[lev+1] = emptyderiv; | 
 | 		deriv[lev+2] = emptyderiv; | 
 | 	} | 
 | 	return h; | 
 | } | 
 |  | 
 | Bits | 
 | tryword(char* bp, char* ep, int lev, int flag) | 
 | { | 
 | 	int  j; | 
 | 	Bits h = 0; | 
 | 	char duple[3]; | 
 |  | 
 | 	if(ep-bp <= 1) | 
 | 		return h; | 
 | 	if(flag&MONO) { | 
 | 		if(lev<DSIZ) { | 
 | 			deriv[++lev].mesg = duple; | 
 | 			deriv[lev].type = SUFF; | 
 | 		} | 
 | 		duple[0] = '+'; | 
 | 		duple[1] = *ep; | 
 | 		duple[2] = 0; | 
 | 	} | 
 | 	h = dict(bp, ep); | 
 | 	if(vflag==0 || h==0) | 
 | 		return h; | 
 | 	/* | 
 | 	 * when derivations are wanted, collect them | 
 | 	 * for printing | 
 | 	 */ | 
 | 	j = lev; | 
 | 	prefcount = suffcount = 0; | 
 | 	do { | 
 | 		if(j<DSIZ && deriv[j].type) { | 
 | 			strcat(affix, deriv[j].mesg); | 
 | 			if(deriv[j].type == SUFF) | 
 | 				suffcount++; | 
 | 			else if(deriv[j].type != NONE) | 
 | 				prefcount = deriv[j].type/PREF; | 
 | 		} | 
 | 	} while(--j > 0); | 
 | 	return h; | 
 | } | 
 |  | 
 | int | 
 | inun(char* bp, Bits h) | 
 | { | 
 | 	if(*bp == 'u') | 
 | 		return Set(h, IN) == 0; | 
 | 	/* *bp == 'i' */ | 
 | 	if(Set(h, IN) == 0) | 
 | 		return 0; | 
 | 	switch(bp[2]) { | 
 | 	case 'r': | 
 | 		return bp[1] == 'r'; | 
 | 	case 'm': | 
 | 	case 'p': | 
 | 		return bp[1] == 'm'; | 
 | 	} | 
 | 	return bp[1] == 'n'; | 
 | } | 
 |  | 
 | char* | 
 | skipv(char *s) | 
 | { | 
 | 	if(s >= word && ISVOWEL(*s)) | 
 | 		s--; | 
 | 	while(s >= word && !ISVOWEL(*s)) | 
 | 		s--; | 
 | 	return s; | 
 | } | 
 |  | 
 | /* | 
 |  * crummy way to Britishise | 
 |  */ | 
 | void | 
 | ise(void) | 
 | { | 
 | 	Suftab *p; | 
 | 	int i; | 
 |  | 
 | 	for(i=0; i<26; i++) | 
 | 		for(p = suftab[i]; p->suf; p++) { | 
 | 			p->suf = ztos(p->suf); | 
 | 			p->d1 = ztos(p->d1); | 
 | 			p->a1 = ztos(p->a1); | 
 | 		} | 
 | } | 
 |  | 
 | char* | 
 | ztos(char *as) | 
 | { | 
 | 	char *s, *ds; | 
 |  | 
 | 	for(s=as; *s; s++) | 
 | 		if(*s == 'z') | 
 | 			goto copy; | 
 | 	return as; | 
 |  | 
 | copy: | 
 | 	ds = strdup(as); | 
 | 	for(s=ds; *s; s++) | 
 | 		if(*s == 'z') | 
 | 			*s = 's'; | 
 | 	return ds; | 
 | } | 
 |  | 
 | Bits | 
 | dict(char* bp, char* ep) | 
 | { | 
 | 	char *cp, *cp1, *w, *wp, *we; | 
 | 	int n, f; | 
 |  | 
 | 	w = bp; | 
 | 	we = ep; | 
 | 	n = ep-bp; | 
 | 	if(n <= 1) | 
 | 		return NOUN; | 
 |  | 
 | 	f = w[0] & 0x7f; | 
 | 	f *= 128; | 
 | 	f += w[1] & 0x7f; | 
 | 	bp = spacep[f]; | 
 | 	ep = spacep[f+1]; | 
 |  | 
 | loop: | 
 | 	if(bp >= ep) { | 
 | 		if(xflag)  | 
 | 			fprint(2, "=%.*s\n", utfnlen(w, n), w); | 
 | 		return 0; | 
 | 	} | 
 | 	/* | 
 | 	 * find the beginning of some word in the middle | 
 | 	 */ | 
 | 	cp = bp + (ep-bp)/2; | 
 |  | 
 | 	while(cp > bp && !(*cp & 0x80)) | 
 | 		cp--; | 
 | 	while(cp > bp && (cp[-1] & 0x80)) | 
 | 		cp--; | 
 |  | 
 | 	wp = w + 2;	/* skip two letters */ | 
 | 	cp1 = cp + 2;	/* skip affix code */ | 
 | 	for(;;) { | 
 | 		if(wp >= we) { | 
 | 			if(*cp1 & 0x80) | 
 | 				goto found; | 
 | 			else | 
 | 				f = 1; | 
 | 			break; | 
 | 		} | 
 | 		if(*cp1 & 0x80) { | 
 | 			f = -1; | 
 | 			break; | 
 | 		} | 
 | 		f = *cp1++ - *wp++; | 
 | 		if(f != 0) | 
 | 			break; | 
 | 	} | 
 |  | 
 | 	if(f < 0) { | 
 | 		while(!(*cp1 & 0x80)) | 
 | 			cp1++; | 
 | 		bp = cp1; | 
 | 		goto loop; | 
 | 	} | 
 | 	ep = cp; | 
 | 	goto loop; | 
 |  | 
 | found: | 
 | 	f = ((cp[0] & 0x7) << 8) | | 
 | 		(cp[1] & 0xff); | 
 | 	if(xflag) { | 
 | 		fprint(2, "=%.*s ", utfnlen(w, n), w); | 
 | 		typeprint(encode[f]); | 
 | 	} | 
 | 	return encode[f]; | 
 | } | 
 |  | 
 | void | 
 | typeprint(Bits h) | 
 | { | 
 |  | 
 | 	pcomma(""); | 
 | 	if(h & NOUN) | 
 | 		pcomma("n"); | 
 | 	if(h & PROP_COLLECT) | 
 | 		pcomma("pc"); | 
 | 	if(h & VERB) { | 
 | 		if((h & VERB) == VERB) | 
 | 			pcomma("v"); | 
 | 		else | 
 | 		if((h & VERB) == V_IRREG) | 
 | 			pcomma("vi"); | 
 | 		else | 
 | 		if(h & ED) | 
 | 			pcomma("ed"); | 
 | 	} | 
 | 	if(h & ADJ) | 
 | 		pcomma("a"); | 
 | 	if(h & COMP) { | 
 | 		if((h & COMP) == ACTOR) | 
 | 			pcomma("er"); | 
 | 		else | 
 | 			pcomma("comp"); | 
 | 	} | 
 | 	if(h & DONT_TOUCH) | 
 | 		pcomma("d"); | 
 | 	if(h & N_AFFIX) | 
 | 		pcomma("na"); | 
 | 	if(h & ADV) | 
 | 		pcomma("adv"); | 
 | 	if(h & ION) | 
 | 		pcomma("ion"); | 
 | 	if(h & V_AFFIX) | 
 | 		pcomma("va"); | 
 | 	if(h & MAN) | 
 | 		pcomma("man"); | 
 | 	if(h & NOPREF) | 
 | 		pcomma("nopref"); | 
 | 	if(h & MONO) | 
 | 		pcomma("ms"); | 
 | 	if(h & IN) | 
 | 		pcomma("in"); | 
 | 	if(h & _Y) | 
 | 		pcomma("y"); | 
 | 	if(h & STOP) | 
 | 		pcomma("s"); | 
 | 	fprint(2, "\n"); | 
 | } | 
 |  | 
 | void | 
 | pcomma(char *s) | 
 | { | 
 | 	static int flag; | 
 |  | 
 | 	if(*s == 0) { | 
 | 		flag = 0; | 
 | 		return; | 
 | 	} | 
 | 	if(!flag) { | 
 | 		fprint(2, "%s", s); | 
 | 		flag = 1; | 
 | 	} else | 
 | 		fprint(2, ",%s", s); | 
 | } | 
 |  | 
 | /* | 
 |  * is the word on of the following | 
 |  *	12th	teen | 
 |  *	21st	end in 1 | 
 |  *	23rd	end in 3 | 
 |  *	77th	default | 
 |  * called knowing word[0] is a digit | 
 |  */ | 
 | int | 
 | ordinal(void) | 
 | { | 
 | 	char *cp = word; | 
 | 	static char sp[4]; | 
 |  | 
 | 	while(ISDIGIT(*cp)) | 
 | 		cp++; | 
 | 	strncpy(sp,cp,3); | 
 | 	if(ISUPPER(cp[0]) && ISUPPER(cp[1])) { | 
 | 		sp[0] = Tolower(cp[0]); | 
 | 		sp[1] = Tolower(cp[1]); | 
 | 	} | 
 | 	return 0 == strncmp(sp, | 
 | 		cp[-2]=='1'? "th":	/* out of bounds if 1 digit */ | 
 | 		*--cp=='1'? "st":	/* harmless */ | 
 | 		*cp=='2'? "nd": | 
 | 		*cp=='3'? "rd": | 
 | 		"th", 3); | 
 | } | 
 |  | 
 | /* | 
 |  * read in the dictionary. | 
 |  * format is | 
 |  * { | 
 |  *	short	nencode; | 
 |  *	long	encode[nencode]; | 
 |  *	char	space[*]; | 
 |  * }; | 
 |  * | 
 |  * the encodings are a table all different | 
 |  * affixes. | 
 |  * the dictionary proper has 2 bytes | 
 |  * that demark and then the rest of the | 
 |  * word. the 2 bytes have the following | 
 |  *	0x80 0x00	flag | 
 |  *	0x78 0x00	count of prefix bytes | 
 |  *			common with prev word | 
 |  *	0x07 0xff	affix code | 
 |  * | 
 |  * all ints are big endians in the file. | 
 |  */ | 
 | void | 
 | readdict(char *file) | 
 | { | 
 | 	char *s, *is, *lasts, *ls; | 
 | 	int c, i, sp, p; | 
 | 	int f; | 
 | 	long l; | 
 |  | 
 | 	lasts = 0; | 
 | 	f = open(file, 0); | 
 | 	if(f == -1) { | 
 | 		fprint(2, "cannot open %s\n", file); | 
 | 		exits("open"); | 
 | 	} | 
 | 	if(read(f, space, 2) != 2) | 
 | 		goto bad; | 
 | 	nencode = ((space[0]&0xff)<<8) | (space[1]&0xff); | 
 | 	if(read(f, space, 4*nencode) != 4*nencode) | 
 | 		goto bad; | 
 | 	s = space; | 
 | 	for(i=0; i<nencode; i++) { | 
 | 		l = (long)(s[0] & 0xff) << 24; | 
 | 		l |= (s[1] & 0xff) << 16; | 
 | 		l |= (s[2] & 0xff) << 8; | 
 | 		l |= s[3] & 0xff; | 
 | 		encode[i] = (Bits)l; | 
 | 		s += 4; | 
 | 	} | 
 | 	l = read(f, space, sizeof(space)); | 
 | 	if(l == sizeof(space)) | 
 | 		goto noroom; | 
 | 	is = space + (sizeof(space) - l); | 
 | 	memmove(is, space, l); | 
 |  | 
 | 	s = space; | 
 | 	c = *is++ & 0xff; | 
 | 	sp = -1; | 
 | 	i = 0; | 
 |  | 
 | loop: | 
 | 	if(s > is) | 
 | 		goto noroom; | 
 | 	if(c < 0) { | 
 | 		close(f); | 
 | 		while(sp < 128*128) | 
 | 			spacep[++sp] = s; | 
 | 		*s = (char)0x80;		/* fence */ | 
 | 		return; | 
 | 	} | 
 | 	p = (c>>3) & 0xf; | 
 | 	*s++ = c; | 
 | 	*s++ = *is++ & 0xff; | 
 | 	if(p <= 0) | 
 | 		i = (*is++ & 0xff)*128; | 
 | 	if(p <= 1) { | 
 | 		if(!(*is & 0x80)) | 
 | 			i = i/128*128 + (*is++ & 0xff); | 
 | 		if(i <= sp) { | 
 | 			fprint(2, "the dict isnt sorted or \n"); | 
 | 			fprint(2, "memmove didn't work\n"); | 
 | 			goto bad; | 
 | 		} | 
 | 		while(sp < i) | 
 | 			spacep[++sp] = s-2; | 
 | 	} | 
 | 	ls = lasts; | 
 | 	lasts = s; | 
 | 	for(p-=2; p>0; p--) | 
 | 		*s++ = *ls++; | 
 | 	for(;;) { | 
 | 		if(is >= space+sizeof(space)) { | 
 | 			c = -1; | 
 | 			break; | 
 | 		} | 
 | 		c = *is++ & 0xff; | 
 | 		if(c & 0x80) | 
 | 			break; | 
 | 		*s++ = c; | 
 | 	} | 
 | 	*s = 0; | 
 | 	goto loop; | 
 |  | 
 | bad: | 
 | 	fprint(2, "trouble reading %s\n", file); | 
 | 	exits("read"); | 
 | noroom: | 
 | 	fprint(2, "not enough space for dictionary\n"); | 
 | 	exits("space"); | 
 | } |