| #include <u.h> | 
 | #include <libc.h> | 
 | #include <bio.h> | 
 | #include "dict.h" | 
 | #include "kuten.h" | 
 |  | 
 | /* | 
 |  * Routines for handling dictionaries in the "Languages of the World" | 
 |  * format.  worldnextoff *must* be called with <address of valid entry>+1. | 
 |  */ | 
 |  | 
 | #define	GSHORT(p)	(((p)[0]<<8)|(p)[1]) | 
 |  | 
 | #define putchar dictputchar | 
 |  | 
 | static void	putchar(int, int*); | 
 |  | 
 | #define	NONE	0xffff | 
 |  | 
 | /* adapted from jhelling@cs.ruu.nl (Jeroen Hellingman) */ | 
 |  | 
 | static Rune chartab[] = { | 
 |  | 
 | /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
 | 	NONE,	NONE,'\n',	0xe6,	0xf8,	0xe5,	0xe4,	0xf6, | 
 | /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE, | 
 | 	NONE,	NONE,	NONE,	0xc6,	0xd8,	0xc5,	0xc4,	0xd6, | 
 |  | 
 | /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	0x26,	'\'', | 
 | 	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f, | 
 | /*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37, | 
 | 	0x38,	0x39,	0x3a,	0x3b,	0x3c,	0x3d,	0x3e,	0x3f, | 
 | /*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47, | 
 | 	0x48,	0x49,	0x4a,	0x4b,'L',	0x4d,	0x4e,	0x4f, | 
 | /*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57, | 
 | 	0x58,	0x59,	0x5a,	0x5b,'\\',	0x5d,	0x5e,	0x5f, | 
 | /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67, | 
 | 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f, | 
 | /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77, | 
 | 	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE, | 
 |  | 
 | /*80*/	0xc7,	0xfc,	0xe9,	0xe2,	0xe4,	0xe0,	0xe5,	0xe7, | 
 | 	0xea,	0xeb,	0xe8,	0xef,	0xee,	0xec,	0xc4,	0xc5, | 
 | /*90*/	0xc9,	0xe6,	0xc6,	0xf4,	0xf6,	0xf2,	0xfb,	0xf9, | 
 | 	0xff,	0xd6,	0xdc,	0xa2,	0xa3,	0xa5,	0x20a7,	0x283, | 
 | /*a0*/	0xe1,	0xed,	0xf3,	0xfa,	0xf1,	0xd1,	0xaa,	0xba, | 
 | 	0xbf,	0x2310,	0xac,	0xbd,	0xbc,	0xa1,	0xab,	0xbb, | 
 |  | 
 | /*b0*/	0x254,	0x259,	0xf0,	0x283,	0x292,	0x14b,	0x251,	0x7a, | 
 | 	0x26a,	0xf0,	0x292,	0xe3,	0x153,	0x169,	0x28c,	0x265, | 
 | /*c0*/	0x280,	0xeb,	0x6c,	0x28c,	0xf5,	0xf1,	0x152,	NONE, | 
 | 	NONE,	0x53,	0x73,	0x5a,	0x7a,	NONE,	NONE,	NONE, | 
 | /*d0*/	0xdf,	NONE,	NONE,	0x101,	0x12b,	0x16b,	0x113,	0x14d,	 | 
 | 	NONE,	NONE,	NONE,	0x20,	NONE,	NONE,	NONE,	NONE, | 
 |  | 
 | /*e0*/	0x3b1,	0x3b2,	0x3b3,	0x3c0,	0x3a3,	0x3c3,	0xb5,	0x3c4, | 
 | 	0x3a6,	0x398,	0x3a9,	0x3b4,	0x221e,	0xd8,	0x3b5,	0x2229, | 
 | /*f0*/	0x2261,	0xb1,	0x2265,	0x2264,	0x2320,	0x2321,	0xf7,	0x2248, | 
 | 	0xb0,	0x2219,	0xb7,	NONE,	NONE,	NONE,	NONE,	NONE | 
 | }; | 
 |  | 
 | enum{ Utf, Kanahi, Kanalo=Kanahi+1, GBhi, GBlo=GBhi+1 }; | 
 |  | 
 | void | 
 | worldprintentry(Entry e, int cmd) | 
 | { | 
 | 	int nh, state[3]; | 
 | 	uchar *p, *pe; | 
 |  | 
 | 	p = (uchar *)e.start; | 
 | 	pe = (uchar *)e.end; | 
 | 	nh = GSHORT(p); | 
 | 	p += 6; | 
 | 	if(cmd == 'h') | 
 | 		pe = p+nh; | 
 | 	state[0] = Utf; | 
 | 	state[1] = 0; | 
 | 	state[2] = 0; | 
 | 	while(p < pe){ | 
 | 		if(cmd == 'r') | 
 | 			outchar(*p++); | 
 | 		else | 
 | 			putchar(*p++, state); | 
 | 	} | 
 | 	outnl(0); | 
 | } | 
 |  | 
 | long | 
 | worldnextoff(long fromoff) | 
 | { | 
 | 	int nh, np, nd; | 
 | 	uchar buf[6]; | 
 |  | 
 | 	if(Bseek(bdict, fromoff-1, 0) < 0) | 
 | 		return -1; | 
 | 	if(Bread(bdict, buf, 6) != 6) | 
 | 		return -1; | 
 | 	nh = GSHORT(buf); | 
 | 	np = GSHORT(buf+2); | 
 | 	nd = GSHORT(buf+4); | 
 | 	return fromoff-1 + 6 + nh + np + nd; | 
 | } | 
 |  | 
 | static void | 
 | putchar(int c, int *state) | 
 | { | 
 | 	int xflag = 0; | 
 | 	Rune r; | 
 | 	int hi, lo; | 
 |  | 
 | 	switch(state[0]){ | 
 | 	case Kanahi: | 
 | 	case GBhi: | 
 | 		if(CANS2JH(c) || c == 0xff){ | 
 | 			state[0]++; | 
 | 			state[1] = c; | 
 | 			break; | 
 | 		} | 
 | 		/* fall through */ | 
 | 	case Utf: | 
 | 		if(c == 0xfe){ | 
 | 			state[0] = Kanahi; | 
 | 			break; | 
 | 		}else if(c == 0xff){ | 
 | 			state[0] = GBhi; | 
 | 			break; | 
 | 		} | 
 | 		r = chartab[c]; | 
 | 		if(r < 0x80 && state[2] == 0) | 
 | 			outchar(r); | 
 | 		else if(r == NONE){ | 
 | 			switch(c){ | 
 | 			case 0xfb: | 
 | 				if(!xflag){ | 
 | 					state[2] = 1; | 
 | 					break; | 
 | 				} | 
 | 			case 0xfc: | 
 | 				if(!xflag){ | 
 | 					state[2] = 0; | 
 | 					break; | 
 | 				} | 
 | 			case 0x10: | 
 | 			case 0xc7: case 0xc8: | 
 | 			case 0xd8: case 0xd9: case 0xda: | 
 | 			case 0xdc: case 0xdd: case 0xde: case 0xdf: | 
 | 			case 0xfd: | 
 | 				if(!xflag) | 
 | 					break; | 
 | 				/* fall through */ | 
 | 			default: | 
 | 				outprint("\\%.2ux", c); | 
 | 			} | 
 | 		}else if(state[2] == 0) | 
 | 			outrune(r); | 
 | 		break; | 
 | 	case Kanalo: | 
 | 	case GBlo: | 
 | 		if(state[1] == 0xff && c == 0xff){ | 
 | 			state[0] = Utf; | 
 | 			break; | 
 | 		} | 
 | 		state[0]--; | 
 | 		hi = state[1]; | 
 | 		lo = c; | 
 | 		S2J(hi, lo);		/* convert to JIS */ | 
 | 		r = hi*100 + lo - 3232;	/* convert to jis208 */ | 
 | 		if(state[0] == Kanahi && r < JIS208MAX) | 
 | 			r = tabjis208[r]; | 
 | 		else if(state[0] == GBhi && r < GB2312MAX) | 
 | 			r = tabgb2312[r]; | 
 | 		else | 
 | 			r = NONE; | 
 | 		if(r == NONE) | 
 | 			outprint("\\%.2ux\\%.2ux", state[1], c); | 
 | 		else | 
 | 			outrune(r); | 
 | 		break; | 
 | 	} | 
 | } | 
 |  | 
 | void | 
 | worldprintkey(void) | 
 | { | 
 | 	Bprint(bout, "No pronunciation key.\n"); | 
 | } |