| #ifdef	PLAN9 | 
 | #include	<u.h> | 
 | #include	<libc.h> | 
 | #include	<bio.h> | 
 | #else | 
 | #include	<stdio.h> | 
 | #include	<unistd.h> | 
 | #include	"plan9.h" | 
 | #endif | 
 | #include	"hdr.h" | 
 | #include	"conv.h" | 
 | #include	"kuten208.h" | 
 | #include	"jis.h" | 
 |  | 
 | /* | 
 | 	a state machine for interpreting all sorts of encodings | 
 | */ | 
 | static void | 
 | alljis(int c, Rune **r, long input_loc) | 
 | { | 
 | 	static enum { state0, state1, state2, state3, state4 } state = state0; | 
 | 	static int set8 = 0; | 
 | 	static int japan646 = 0; | 
 | 	static int lastc; | 
 | 	int n; | 
 | 	long l; | 
 |  | 
 | again: | 
 | 	switch(state) | 
 | 	{ | 
 | 	case state0:	/* idle state */ | 
 | 		if(c == ESC){ state = state1; return; } | 
 | 		if(c < 0) return; | 
 | 		if(!set8 && (c < 128)){ | 
 | 			if(japan646){ | 
 | 				switch(c) | 
 | 				{ | 
 | 				case '\\':	emit(0xA5); return;	/* yen */ | 
 | 				case '~':	emit(0xAF); return;	/* spacing macron */ | 
 | 				default:	emit(c); return; | 
 | 				} | 
 | 			} else { | 
 | 				emit(c); | 
 | 				return; | 
 | 			} | 
 | 		} | 
 | 		if(c < 0x21){	/* guard against bogus characters in JIS mode */ | 
 | 			if(squawk) | 
 | 				EPR "%s: non-JIS character %02x in %s near byte %ld\n", argv0, c, file, input_loc); | 
 | 			emit(c); | 
 | 			return; | 
 | 		} | 
 | 		lastc = c; state = state4; return; | 
 |  | 
 | 	case state1:	/* seen an escape */ | 
 | 		if(c == '$'){ state = state2; return; } | 
 | 		if(c == '('){ state = state3; return; } | 
 | 		emit(ESC); state = state0; goto again; | 
 |  | 
 | 	case state2:	/* may be shifting into JIS */ | 
 | 		if((c == '@') || (c == 'B')){ | 
 | 			set8 = 1; state = state0; return; | 
 | 		} | 
 | 		emit(ESC); emit('$'); state = state0; goto again; | 
 |  | 
 | 	case state3:	/* may be shifting out of JIS */ | 
 | 		if((c == 'J') || (c == 'H') || (c == 'B')){ | 
 | 			japan646 = (c == 'J'); | 
 | 			set8 = 0; state = state0; return; | 
 | 		} | 
 | 		emit(ESC); emit('('); state = state0; goto again; | 
 |  | 
 | 	case state4:	/* two part char */ | 
 | 		if(c < 0){ | 
 | 			if(squawk) | 
 | 				EPR "%s: unexpected EOF in %s\n", argv0, file); | 
 | 			c = 0x21 | (lastc&0x80); | 
 | 		} | 
 | 		if(CANS2J(lastc, c)){	/* ms dos sjis */ | 
 | 			int hi = lastc, lo = c; | 
 | 			S2J(hi, lo);			/* convert to 208 */ | 
 | 			n = hi*100 + lo - 3232;		/* convert to kuten208 */ | 
 | 		} else | 
 | 			n = (lastc&0x7F)*100 + (c&0x7f) - 3232;	/* kuten208 */ | 
 | 		if((n >= KUTEN208MAX) || ((l = tabkuten208[n]) == -1)){ | 
 | 			nerrors++; | 
 | 			if(squawk) | 
 | 				EPR "%s: unknown kuten208 %d (from 0x%x,0x%x) near byte %ld in %s\n", argv0, n, lastc, c, input_loc, file); | 
 | 			if(!clean) | 
 | 				emit(BADMAP); | 
 | 		} else { | 
 | 			if(l < 0){ | 
 | 				l = -l; | 
 | 				if(squawk) | 
 | 					EPR "%s: ambiguous kuten208 %d (mapped to 0x%lx) near byte %ld in %s\n", argv0, n, l, input_loc, file); | 
 | 			} | 
 | 			emit(l); | 
 | 		} | 
 | 		state = state0; | 
 | 	} | 
 | } | 
 |  | 
 | /* | 
 | 	a state machine for interpreting ms-kanji == shift-jis. | 
 | */ | 
 | static void | 
 | ms(int c, Rune **r, long input_loc) | 
 | { | 
 | 	static enum { state0, state1, state2, state3, state4 } state = state0; | 
 | 	static int set8 = 0; | 
 | 	static int japan646 = 0; | 
 | 	static int lastc; | 
 | 	int n; | 
 | 	long l; | 
 |  | 
 | again: | 
 | 	switch(state) | 
 | 	{ | 
 | 	case state0:	/* idle state */ | 
 | 		if(c == ESC){ state = state1; return; } | 
 | 		if(c < 0) return; | 
 | 		if(!set8 && (c < 128)){ | 
 | 			if(japan646){ | 
 | 				switch(c) | 
 | 				{ | 
 | 				case '\\':	emit(0xA5); return;	/* yen */ | 
 | 				case '~':	emit(0xAF); return;	/* spacing macron */ | 
 | 				default:	emit(c); return; | 
 | 				} | 
 | 			} else { | 
 | 				emit(c); | 
 | 				return; | 
 | 			} | 
 | 		} | 
 | 		lastc = c; state = state4; return; | 
 |  | 
 | 	case state1:	/* seen an escape */ | 
 | 		if(c == '$'){ state = state2; return; } | 
 | 		if(c == '('){ state = state3; return; } | 
 | 		emit(ESC); state = state0; goto again; | 
 |  | 
 | 	case state2:	/* may be shifting into JIS */ | 
 | 		if((c == '@') || (c == 'B')){ | 
 | 			set8 = 1; state = state0; return; | 
 | 		} | 
 | 		emit(ESC); emit('$'); state = state0; goto again; | 
 |  | 
 | 	case state3:	/* may be shifting out of JIS */ | 
 | 		if((c == 'J') || (c == 'H') || (c == 'B')){ | 
 | 			japan646 = (c == 'J'); | 
 | 			set8 = 0; state = state0; return; | 
 | 		} | 
 | 		emit(ESC); emit('('); state = state0; goto again; | 
 |  | 
 | 	case state4:	/* two part char */ | 
 | 		if(c < 0){ | 
 | 			if(squawk) | 
 | 				EPR "%s: unexpected EOF in %s\n", argv0, file); | 
 | 			c = 0x21 | (lastc&0x80); | 
 | 		} | 
 | 		if(CANS2J(lastc, c)){	/* ms dos sjis */ | 
 | 			int hi = lastc, lo = c; | 
 | 			S2J(hi, lo);			/* convert to 208 */ | 
 | 			n = hi*100 + lo - 3232;		/* convert to kuten208 */ | 
 | 		} else { | 
 | 			nerrors++; | 
 | 			if(squawk) | 
 | 				EPR "%s: illegal byte pair (0x%x,0x%x) near byte %ld in %s\n", argv0, lastc, c, input_loc, file); | 
 | 			if(!clean) | 
 | 				emit(BADMAP); | 
 | 			state = state0; | 
 | 			goto again; | 
 | 		} | 
 | 		if((n >= KUTEN208MAX) || ((l = tabkuten208[n]) == -1)){ | 
 | 			nerrors++; | 
 | 			if(squawk) | 
 | 				EPR "%s: unknown kuten208 %d (from 0x%x,0x%x) near byte %ld in %s\n", argv0, n, lastc, c, input_loc, file); | 
 | 			if(!clean) | 
 | 				emit(BADMAP); | 
 | 		} else { | 
 | 			if(l < 0){ | 
 | 				l = -l; | 
 | 				if(squawk) | 
 | 					EPR "%s: ambiguous kuten208 %d (mapped to 0x%lx) near byte %ld in %s\n", argv0, n, l, input_loc, file); | 
 | 			} | 
 | 			emit(l); | 
 | 		} | 
 | 		state = state0; | 
 | 	} | 
 | } | 
 |  | 
 | /* | 
 | 	a state machine for interpreting ujis == EUC | 
 | */ | 
 | static void | 
 | ujis(int c, Rune **r, long input_loc) | 
 | { | 
 | 	static enum { state0, state1 } state = state0; | 
 | 	static int lastc; | 
 | 	int n; | 
 | 	long l; | 
 |  | 
 | 	switch(state) | 
 | 	{ | 
 | 	case state0:	/* idle state */ | 
 | 		if(c < 0) return; | 
 | 		if(c < 128){ | 
 | 			emit(c); | 
 | 			return; | 
 | 		} | 
 | 		if(c == 0x8e){	/* codeset 2 */ | 
 | 			nerrors++; | 
 | 			if(squawk) | 
 | 				EPR "%s: unknown codeset 2 near byte %ld in %s\n", argv0, input_loc, file); | 
 | 			if(!clean) | 
 | 				emit(BADMAP); | 
 | 			return; | 
 | 		} | 
 | 		if(c == 0x8f){	/* codeset 3 */ | 
 | 			nerrors++; | 
 | 			if(squawk) | 
 | 				EPR "%s: unknown codeset 3 near byte %ld in %s\n", argv0, input_loc, file); | 
 | 			if(!clean) | 
 | 				emit(BADMAP); | 
 | 			return; | 
 | 		} | 
 | 		lastc = c; | 
 | 		state = state1; | 
 | 		return; | 
 |  | 
 | 	case state1:	/* two part char */ | 
 | 		if(c < 0){ | 
 | 			if(squawk) | 
 | 				EPR "%s: unexpected EOF in %s\n", argv0, file); | 
 | 			c = 0xA1; | 
 | 		} | 
 | 		n = (lastc&0x7F)*100 + (c&0x7F) - 3232;	/* kuten208 */ | 
 | 		if((n >= KUTEN208MAX) || ((l = tabkuten208[n]) == -1)){ | 
 | 			nerrors++; | 
 | 			if(squawk) | 
 | 				EPR "%s: unknown kuten208 %d (from 0x%x,0x%x) near byte %ld in %s\n", argv0, n, lastc, c, input_loc, file); | 
 | 			if(!clean) | 
 | 				emit(BADMAP); | 
 | 		} else { | 
 | 			if(l < 0){ | 
 | 				l = -l; | 
 | 				if(squawk) | 
 | 					EPR "%s: ambiguous kuten208 %d (mapped to 0x%lx) near byte %ld in %s\n", argv0, n, l, input_loc, file); | 
 | 			} | 
 | 			emit(l); | 
 | 		} | 
 | 		state = state0; | 
 | 	} | 
 | } | 
 |  | 
 | /* | 
 | 	a state machine for interpreting jis-kanji == 2022-JP | 
 | */ | 
 | static void | 
 | jis(int c, Rune **r, long input_loc) | 
 | { | 
 | 	static enum { state0, state1, state2, state3, state4 } state = state0; | 
 | 	static int set8 = 0; | 
 | 	static int japan646 = 0; | 
 | 	static int lastc; | 
 | 	int n; | 
 | 	long l; | 
 |  | 
 | again: | 
 | 	switch(state) | 
 | 	{ | 
 | 	case state0:	/* idle state */ | 
 | 		if(c == ESC){ state = state1; return; } | 
 | 		if(c < 0) return; | 
 | 		if(!set8 && (c < 128)){ | 
 | 			if(japan646){ | 
 | 				switch(c) | 
 | 				{ | 
 | 				case '\\':	emit(0xA5); return;	/* yen */ | 
 | 				case '~':	emit(0xAF); return;	/* spacing macron */ | 
 | 				default:	emit(c); return; | 
 | 				} | 
 | 			} else { | 
 | 				emit(c); | 
 | 				return; | 
 | 			} | 
 | 		} | 
 | 		lastc = c; state = state4; return; | 
 |  | 
 | 	case state1:	/* seen an escape */ | 
 | 		if(c == '$'){ state = state2; return; } | 
 | 		if(c == '('){ state = state3; return; } | 
 | 		emit(ESC); state = state0; goto again; | 
 |  | 
 | 	case state2:	/* may be shifting into JIS */ | 
 | 		if((c == '@') || (c == 'B')){ | 
 | 			set8 = 1; state = state0; return; | 
 | 		} | 
 | 		emit(ESC); emit('$'); state = state0; goto again; | 
 |  | 
 | 	case state3:	/* may be shifting out of JIS */ | 
 | 		if((c == 'J') || (c == 'H') || (c == 'B')){ | 
 | 			japan646 = (c == 'J'); | 
 | 			set8 = 0; state = state0; return; | 
 | 		} | 
 | 		emit(ESC); emit('('); state = state0; goto again; | 
 |  | 
 | 	case state4:	/* two part char */ | 
 | 		if(c < 0){ | 
 | 			if(squawk) | 
 | 				EPR "%s: unexpected EOF in %s\n", argv0, file); | 
 | 			c = 0x21 | (lastc&0x80); | 
 | 		} | 
 | 		if((lastc&0x80) != (c&0x80)){	/* guard against latin1 in jis */ | 
 | 			emit(lastc); | 
 | 			state = state0; | 
 | 			goto again; | 
 | 		} | 
 | 		n = (lastc&0x7F)*100 + (c&0x7f) - 3232;	/* kuten208 */ | 
 | 		if((n >= KUTEN208MAX) || ((l = tabkuten208[n]) == -1)){ | 
 | 			nerrors++; | 
 | 			if(squawk) | 
 | 				EPR "%s: unknown kuten208 %d (from 0x%x,0x%x) near byte %ld in %s\n", argv0, n, lastc, c, input_loc, file); | 
 | 			if(!clean) | 
 | 				emit(BADMAP); | 
 | 		} else { | 
 | 			if(l < 0){ | 
 | 				l = -l; | 
 | 				if(squawk) | 
 | 					EPR "%s: ambiguous kuten208 %d (mapped to 0x%lx) near byte %ld in %s\n", argv0, n, l, input_loc, file); | 
 | 			} | 
 | 			emit(l); | 
 | 		} | 
 | 		state = state0; | 
 | 	} | 
 | } | 
 |  | 
 | static void | 
 | do_in(int fd, void (*procfn)(int, Rune **, long), struct convert *out) | 
 | { | 
 | 	Rune ob[N]; | 
 | 	Rune *r, *re; | 
 | 	uchar ibuf[N]; | 
 | 	int n, i; | 
 | 	long nin; | 
 |  | 
 | 	r = ob; | 
 | 	re = ob+N-3; | 
 | 	nin = 0; | 
 | 	while((n = read(fd, ibuf, sizeof ibuf)) > 0){ | 
 | 		for(i = 0; i < n; i++){ | 
 | 			(*procfn)(ibuf[i], &r, nin++); | 
 | 			if(r >= re){ | 
 | 				OUT(out, ob, r-ob); | 
 | 				r = ob; | 
 | 			} | 
 | 		} | 
 | 		if(r > ob){ | 
 | 			OUT(out, ob, r-ob); | 
 | 			r = ob; | 
 | 		} | 
 | 	} | 
 | 	(*procfn)(-1, &r, nin); | 
 | 	if(r > ob) | 
 | 		OUT(out, ob, r-ob); | 
 | 	OUT(out, ob, 0); | 
 | } | 
 |  | 
 | void | 
 | jis_in(int fd, long *notused, struct convert *out) | 
 | { | 
 | 	USED(notused); | 
 | 	do_in(fd, alljis, out); | 
 | } | 
 |  | 
 | void | 
 | ujis_in(int fd, long *notused, struct convert *out) | 
 | { | 
 | 	USED(notused); | 
 | 	do_in(fd, ujis, out); | 
 | } | 
 |  | 
 | void | 
 | msjis_in(int fd, long *notused, struct convert *out) | 
 | { | 
 | 	USED(notused); | 
 | 	do_in(fd, ms, out); | 
 | } | 
 |  | 
 | void | 
 | jisjis_in(int fd, long *notused, struct convert *out) | 
 | { | 
 | 	USED(notused); | 
 | 	do_in(fd, jis, out); | 
 | } | 
 |  | 
 | static int first = 1; | 
 |  | 
 | static void | 
 | tab_init(void) | 
 | { | 
 | 	int i; | 
 | 	long l; | 
 |  | 
 | 	first = 0; | 
 | 	for(i = 0; i < NRUNE; i++) | 
 | 		tab[i] = -1; | 
 | 	for(i = 0; i < KUTEN208MAX; i++) | 
 | 		if((l = tabkuten208[i]) != -1){ | 
 | 			if(l < 0) | 
 | 				tab[-l] = i; | 
 | 			else | 
 | 				tab[l] = i; | 
 | 		} | 
 | } | 
 |  | 
 |  | 
 | /*	jis-kanji, or ISO 2022-JP	*/ | 
 | void | 
 | jisjis_out(Rune *base, int n, long *notused) | 
 | { | 
 | 	char *p; | 
 | 	int i; | 
 | 	Rune r; | 
 | 	static enum { ascii, japan646, jp2022 } state = ascii; | 
 |  | 
 | 	USED(notused); | 
 | 	if(first) | 
 | 		tab_init(); | 
 | 	nrunes += n; | 
 | 	p = obuf; | 
 | 	for(i = 0; i < n; i++){ | 
 | 		r = base[i]; | 
 | 		if(r < 128){ | 
 | 			if(state == jp2022){ | 
 | 				*p++ = ESC; *p++ = '('; *p++ = 'B'; | 
 | 				state = ascii; | 
 | 			} | 
 | 			*p++ = r; | 
 | 		} else { | 
 | 			if(tab[r] != -1){ | 
 | 				if(state != jp2022){ | 
 | 					*p++ = ESC; *p++ = '$'; *p++ = 'B'; | 
 | 					state = jp2022; | 
 | 				} | 
 | 				*p++ = tab[r]/100 + ' '; | 
 | 				*p++ = tab[r]%100 + ' '; | 
 | 				continue; | 
 | 			} | 
 | 			if(squawk) | 
 | 				EPR "%s: rune 0x%x not in output cs\n", argv0, r); | 
 | 			nerrors++; | 
 | 			if(clean) | 
 | 				continue; | 
 | 			*p++ = BYTEBADMAP; | 
 | 		} | 
 | 	} | 
 | 	noutput += p-obuf; | 
 | 	if(p > obuf) | 
 | 		write(1, obuf, p-obuf); | 
 | } | 
 |  | 
 | /*	ms-kanji, or Shift-JIS	*/ | 
 | void | 
 | msjis_out(Rune *base, int n, long *notused) | 
 | { | 
 | 	char *p; | 
 | 	int i, hi, lo; | 
 | 	Rune r; | 
 |  | 
 | 	USED(notused); | 
 | 	if(first) | 
 | 		tab_init(); | 
 | 	nrunes += n; | 
 | 	p = obuf; | 
 | 	for(i = 0; i < n; i++){ | 
 | 		r = base[i]; | 
 | 		if(r < 128) | 
 | 			*p++ = r; | 
 | 		else { | 
 | 			if(tab[r] != -1){ | 
 | 				hi = tab[r]/100 + ' '; | 
 | 				lo = tab[r]%100 + ' '; | 
 | 				J2S(hi, lo); | 
 | 				*p++ = hi; | 
 | 				*p++ = lo; | 
 | 				continue; | 
 | 			} | 
 | 			if(squawk) | 
 | 				EPR "%s: rune 0x%x not in output cs\n", argv0, r); | 
 | 			nerrors++; | 
 | 			if(clean) | 
 | 				continue; | 
 | 			*p++ = BYTEBADMAP; | 
 | 		} | 
 | 	} | 
 | 	noutput += p-obuf; | 
 | 	if(p > obuf) | 
 | 		write(1, obuf, p-obuf); | 
 | } | 
 |  | 
 | /*	ujis, or EUC	*/ | 
 | void | 
 | ujis_out(Rune *base, int n, long *notused) | 
 | { | 
 | 	char *p; | 
 | 	int i; | 
 | 	Rune r; | 
 |  | 
 | 	USED(notused); | 
 | 	if(first) | 
 | 		tab_init(); | 
 | 	nrunes += n; | 
 | 	p = obuf; | 
 | 	for(i = 0; i < n; i++){ | 
 | 		r = base[i]; | 
 | 		if(r < 128) | 
 | 			*p++ = r; | 
 | 		else { | 
 | 			if(tab[r] != -1){ | 
 | 				*p++ = 0x80 | (tab[r]/100 + ' '); | 
 | 				*p++ = 0x80 | (tab[r]%100 + ' '); | 
 | 				continue; | 
 | 			} | 
 | 			if(squawk) | 
 | 				EPR "%s: rune 0x%x not in output cs\n", argv0, r); | 
 | 			nerrors++; | 
 | 			if(clean) | 
 | 				continue; | 
 | 			*p++ = BYTEBADMAP; | 
 | 		} | 
 | 	} | 
 | 	noutput += p-obuf; | 
 | 	if(p > obuf) | 
 | 		write(1, obuf, p-obuf); | 
 | } |