| #include <u.h> | 
 | #include <limits.h> | 
 | #include <libc.h> | 
 | #include <draw.h> | 
 | #include <html.h> | 
 | #include "impl.h" | 
 |  | 
 | Rune whitespace[] = { ' ', '\t', '\n', '\r', '\0' }; | 
 | Rune notwhitespace[] = { '^', ' ', '\t', '\n', '\r' , '\0'}; | 
 |  | 
 | /* All lists start out like List structure. */ | 
 | /* List itself can be used as list of int. */ | 
 | int | 
 | _listlen(List* l) | 
 | { | 
 | 	int n = 0; | 
 |  | 
 | 	while(l != nil) { | 
 | 		l = l->next; | 
 | 		n++; | 
 | 	} | 
 | 	return n; | 
 | } | 
 |  | 
 | /* Cons */ | 
 | List* | 
 | _newlist(int val, List* rest) | 
 | { | 
 | 	List* ans; | 
 |  | 
 | 	ans = (List*)emalloc(sizeof(List)); | 
 | 	ans->val = val; | 
 | 	ans->next = rest; | 
 | 	return ans; | 
 | } | 
 |  | 
 | /* Reverse a list in place */ | 
 | List* | 
 | _revlist(List* l) | 
 | { | 
 | 	List* newl; | 
 | 	List* nextl; | 
 |  | 
 | 	newl = nil; | 
 | 	while(l != nil) { | 
 | 		nextl = l->next; | 
 | 		l->next = newl; | 
 | 		newl = l; | 
 | 		l = nextl; | 
 | 	} | 
 | 	return newl; | 
 | } | 
 |  | 
 | /* The next few routines take a "character class" as argument. */ | 
 | /*    e.g., "a-zA-Z", or "^ \t\n" */ | 
 | /* (ranges indicated by - except in first position; */ | 
 | /*  ^ is first position means "not in" the following class) */ | 
 |  | 
 | /* Splitl splits s[0:n] just before first character of class cl. */ | 
 | /* Answers go in (p1, n1) and (p2, n2). */ | 
 | /* If no split, the whole thing goes in the first component. */ | 
 | /* Note: answers contain pointers into original string. */ | 
 | void | 
 | _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2) | 
 | { | 
 | 	Rune* p; | 
 |  | 
 | 	p = _Strnclass(s, cl, n); | 
 | 	*p1 = s; | 
 | 	if(p == nil) { | 
 | 		*n1 = n; | 
 | 		*p2 = nil; | 
 | 		*n2 = 0; | 
 | 	} | 
 | 	else { | 
 | 		*p2 = p; | 
 | 		*n1 = p-s; | 
 | 		*n2 = n-*n1; | 
 | 	} | 
 | } | 
 |  | 
 | /* Splitr splits s[0:n] just after last character of class cl. */ | 
 | /* Answers go in (p1, n1) and (p2, n2). */ | 
 | /* If no split, the whole thing goes in the last component. */ | 
 | /* Note: answers contain pointers into original string. */ | 
 | void | 
 | _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2) | 
 | { | 
 | 	Rune* p; | 
 |  | 
 | 	p = _Strnrclass(s, cl, n); | 
 | 	if(p == nil) { | 
 | 		*p1 = nil; | 
 | 		*n1 = 0; | 
 | 		*p2 = s; | 
 | 		*n2 = n; | 
 | 	} | 
 | 	else { | 
 | 		*p1 = s; | 
 | 		*p2 = p+1; | 
 | 		*n1 = *p2-s; | 
 | 		*n2 = n-*n1; | 
 | 	} | 
 | } | 
 |  | 
 | /* Splitall splits s[0:n] into parts that are separated by characters from class cl. */ | 
 | /* Each part will have nonzero length. */ | 
 | /* At most alen parts are found, and pointers to their starts go into */ | 
 | /* the strarr array, while their lengths go into the lenarr array. */ | 
 | /* The return value is the number of parts found. */ | 
 | int | 
 | _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen) | 
 | { | 
 | 	int i; | 
 | 	Rune* p; | 
 | 	Rune* q; | 
 | 	Rune* slast; | 
 |  | 
 | 	if(s == nil || n == 0) | 
 | 		return 0; | 
 | 	i = 0; | 
 | 	p = s; | 
 | 	slast = s+n; | 
 | 	while(p < slast && i < alen) { | 
 | 		while(p < slast && _inclass(*p, cl)) | 
 | 			p++; | 
 | 		if(p == slast) | 
 | 			break; | 
 | 		q = _Strnclass(p, cl, slast-p); | 
 | 		if(q == nil) | 
 | 			q = slast; | 
 | 		assert(q > p && q <= slast); | 
 | 		strarr[i] = p; | 
 | 		lenarr[i] = q-p; | 
 | 		i++; | 
 | 		p = q; | 
 | 	} | 
 | 	return i; | 
 | } | 
 |  | 
 | /* Find part of s that excludes leading and trailing whitespace, */ | 
 | /* and return that part in *pans (and its length in *panslen). */ | 
 | void | 
 | _trimwhite(Rune* s, int n, Rune** pans, int* panslen) | 
 | { | 
 | 	Rune* p; | 
 | 	Rune* q; | 
 |  | 
 | 	p = nil; | 
 | 	if(n > 0) { | 
 | 		p = _Strnclass(s, notwhitespace, n); | 
 | 		if(p != nil) { | 
 | 			q = _Strnrclass(s, notwhitespace, n); | 
 | 			assert(q != nil); | 
 | 			n = q+1-p; | 
 | 		} | 
 | 	} | 
 | 	*pans = p; | 
 | 	*panslen = n; | 
 | } | 
 |  | 
 | /* _Strclass returns a pointer to the first element of s that is */ | 
 | /* a member of class cl, nil if none. */ | 
 | Rune* | 
 | _Strclass(Rune* s, Rune* cl) | 
 | { | 
 | 	Rune* p; | 
 |  | 
 | 	for(p = s; *p != 0; p++) | 
 | 		if(_inclass(*p, cl)) | 
 | 			return p; | 
 | 	return nil; | 
 | } | 
 |  | 
 | /* _Strnclass returns a pointer to the first element of s[0:n] that is */ | 
 | /* a member of class cl, nil if none. */ | 
 | Rune* | 
 | _Strnclass(Rune* s, Rune* cl, int n) | 
 | { | 
 | 	Rune* p; | 
 |  | 
 | 	for(p = s; n-- && *p != 0; p++) | 
 | 		if(_inclass(*p, cl)) | 
 | 			return p; | 
 | 	return nil; | 
 | } | 
 |  | 
 | /* _Strrclass returns a pointer to the last element of s that is */ | 
 | /* a member of class cl, nil if none */ | 
 | Rune* | 
 | _Strrclass(Rune* s, Rune* cl) | 
 | { | 
 | 	Rune* p; | 
 |  | 
 | 	if(s == nil || *s == 0) | 
 | 		return nil; | 
 | 	p = s + runestrlen(s) - 1; | 
 | 	while(p >= s) { | 
 | 		if(_inclass(*p, cl)) | 
 | 			return p; | 
 | 		p--; | 
 | 	}; | 
 | 	return nil; | 
 | } | 
 |  | 
 | /* _Strnrclass returns a pointer to the last element of s[0:n] that is */ | 
 | /* a member of class cl, nil if none */ | 
 | Rune* | 
 | _Strnrclass(Rune* s, Rune* cl, int n) | 
 | { | 
 | 	Rune* p; | 
 |  | 
 | 	if(s == nil || *s == 0 || n == 0) | 
 | 		return nil; | 
 | 	p = s + n - 1; | 
 | 	while(p >= s) { | 
 | 		if(_inclass(*p, cl)) | 
 | 			return p; | 
 | 		p--; | 
 | 	}; | 
 | 	return nil; | 
 | } | 
 |  | 
 | /* Is c in the class cl? */ | 
 | int | 
 | _inclass(Rune c, Rune* cl) | 
 | { | 
 | 	int	n; | 
 | 	int	ans; | 
 | 	int	negate; | 
 | 	int	i; | 
 |  | 
 | 	n = _Strlen(cl); | 
 | 	if(n == 0) | 
 | 		return 0; | 
 | 	ans = 0; | 
 | 	negate = 0; | 
 | 	if(cl[0] == '^') { | 
 | 		negate = 1; | 
 | 		cl++; | 
 | 		n--; | 
 | 	} | 
 | 	for(i = 0; i < n; i++) { | 
 | 		if(cl[i] == '-' && i > 0 && i < n - 1) { | 
 | 			if(c >= cl[i - 1] && c <= cl[i + 1]) { | 
 | 				ans = 1; | 
 | 				break; | 
 | 			} | 
 | 			i++; | 
 | 		} | 
 | 		else if(c == cl[i]) { | 
 | 			ans = 1; | 
 | 			break; | 
 | 		} | 
 | 	} | 
 | 	if(negate) | 
 | 		ans = !ans; | 
 | 	return ans; | 
 | } | 
 |  | 
 | /* Is pre a prefix of s? */ | 
 | int | 
 | _prefix(Rune* pre, Rune* s) | 
 | { | 
 | 	int	ns; | 
 | 	int	n; | 
 | 	int	k; | 
 |  | 
 | 	ns = _Strlen(s); | 
 | 	n = _Strlen(pre); | 
 | 	if(ns < n) | 
 | 		return 0; | 
 | 	for(k = 0; k < n; k++) { | 
 | 		if(pre[k] != s[k]) | 
 | 			return 0; | 
 | 	} | 
 | 	return 1; | 
 | } | 
 |  | 
 | /* Number of runes in (null-terminated) s */ | 
 | int | 
 | _Strlen(Rune* s) | 
 | { | 
 | 	if(s == nil) | 
 | 		return 0; | 
 | 	return runestrlen(s); | 
 | } | 
 |  | 
 | /* -1, 0, 1 as s1 is lexicographically less, equal greater than s2 */ | 
 | int | 
 | _Strcmp(Rune *s1, Rune *s2) | 
 | { | 
 | 	if(s1 == nil) | 
 | 		return (s2 == nil || *s2 == 0) ? 0 : -1; | 
 | 	if(s2 == nil) | 
 | 		return (*s1 == 0) ? 0 : 1; | 
 | 	return runestrcmp(s1, s2); | 
 | } | 
 |  | 
 | /* Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars). */ | 
 | /* Also, do a case-insensitive match, assuming s2 */ | 
 | /* has no chars in [A-Z], only their lowercase versions. */ | 
 | /* (This routine is used for in-place keyword lookup, where s2 is in a keyword */ | 
 | /* list and s1 is some substring, possibly mixed-case, in a buffer.) */ | 
 | int | 
 | _Strncmpci(Rune *s1, int n1, Rune *s2) | 
 | { | 
 | 	Rune c1, c2; | 
 |  | 
 | 	for(;;) { | 
 | 		if(n1-- == 0) { | 
 | 			if(*s2 == 0) | 
 | 				return 0; | 
 | 			return -1; | 
 | 		} | 
 | 		c1 = *s1++; | 
 | 		c2 = *s2++; | 
 | 		if(c1 >= 'A' && c1 <= 'Z') | 
 | 			c1 = c1 - 'A' + 'a'; | 
 | 		if(c1 != c2) { | 
 | 			if(c1 > c2) | 
 | 				return 1; | 
 | 			return -1; | 
 | 		} | 
 | 	} | 
 | } | 
 |  | 
 | /* emalloc and copy */ | 
 | Rune* | 
 | _Strdup(Rune* s) | 
 | { | 
 | 	if(s == nil) | 
 | 		return nil; | 
 | 	return _Strndup(s, runestrlen(s)); | 
 | } | 
 |  | 
 | /* emalloc and copy n chars of s (assume s is at least that long), */ | 
 | /* and add 0 terminator. */ | 
 | /* Return nil if n==0. */ | 
 | Rune* | 
 | _Strndup(Rune* s, int n) | 
 | { | 
 | 	Rune* ans; | 
 |  | 
 | 	if(n <= 0) | 
 | 		return nil; | 
 | 	ans = _newstr(n); | 
 | 	memmove(ans, s, n*sizeof(Rune)); | 
 | 	ans[n] = 0; | 
 | 	return ans; | 
 | } | 
 | /* emalloc enough room for n Runes, plus 1 null terminator. */ | 
 | /* (Not initialized to anything.) */ | 
 | Rune* | 
 | _newstr(int n) | 
 | { | 
 | 	return (Rune*)emalloc((n+1)*sizeof(Rune)); | 
 | } | 
 |  | 
 | /* emalloc and copy s+t */ | 
 | Rune* | 
 | _Strdup2(Rune* s, Rune* t) | 
 | { | 
 | 	int ns, nt; | 
 | 	Rune* ans; | 
 | 	Rune* p; | 
 |  | 
 | 	ns = _Strlen(s); | 
 | 	nt = _Strlen(t); | 
 | 	if(ns+nt == 0) | 
 | 		return nil; | 
 | 	ans = _newstr(ns+nt); | 
 | 	p = _Stradd(ans, s, ns); | 
 | 	p = _Stradd(p, t, nt); | 
 | 	*p = 0; | 
 | 	return ans; | 
 | } | 
 |  | 
 | /* Return emalloc'd substring s[start:stop], */ | 
 | Rune* | 
 | _Strsubstr(Rune* s, int start, int stop) | 
 | { | 
 | 	Rune* t; | 
 |  | 
 | 	if(start == stop) | 
 | 		return nil; | 
 | 	t = _Strndup(s+start, stop-start); | 
 | 	return t; | 
 | } | 
 |  | 
 | /* Copy n chars to s1 from s2, and return s1+n */ | 
 | Rune* | 
 | _Stradd(Rune* s1, Rune* s2, int n) | 
 | { | 
 | 	if(n == 0) | 
 | 		return s1; | 
 | 	memmove(s1, s2, n*sizeof(Rune)); | 
 | 	return s1+n; | 
 | } | 
 |  | 
 | /* Like strtol, but converting from Rune* string */ | 
 |  | 
 | /*#define LONG_MAX	2147483647L */ | 
 | /*#define LONG_MIN	-2147483648L */ | 
 |  | 
 | long | 
 | _Strtol(Rune* nptr, Rune** endptr, int base) | 
 | { | 
 | 	Rune* p; | 
 | 	long n, nn; | 
 | 	int c, ovfl, v, neg, ndig; | 
 |  | 
 | 	p = nptr; | 
 | 	neg = 0; | 
 | 	n = 0; | 
 | 	ndig = 0; | 
 | 	ovfl = 0; | 
 |  | 
 | 	/* | 
 | 	 * White space | 
 | 	 */ | 
 | 	for(;;p++){ | 
 | 		switch(*p){ | 
 | 		case ' ': | 
 | 		case '\t': | 
 | 		case '\n': | 
 | 		case '\f': | 
 | 		case '\r': | 
 | 		case '\v': | 
 | 			continue; | 
 | 		} | 
 | 		break; | 
 | 	} | 
 |  | 
 | 	/* | 
 | 	 * Sign | 
 | 	 */ | 
 | 	if(*p=='-' || *p=='+') | 
 | 		if(*p++ == '-') | 
 | 			neg = 1; | 
 |  | 
 | 	/* | 
 | 	 * Base | 
 | 	 */ | 
 | 	if(base==0){ | 
 | 		if(*p != '0') | 
 | 			base = 10; | 
 | 		else{ | 
 | 			base = 8; | 
 | 			if(p[1]=='x' || p[1]=='X'){ | 
 | 				p += 2; | 
 | 				base = 16; | 
 | 			} | 
 | 		} | 
 | 	}else if(base==16 && *p=='0'){ | 
 | 		if(p[1]=='x' || p[1]=='X') | 
 | 			p += 2; | 
 | 	}else if(base<0 || 36<base) | 
 | 		goto Return; | 
 |  | 
 | 	/* | 
 | 	 * Non-empty sequence of digits | 
 | 	 */ | 
 | 	for(;; p++,ndig++){ | 
 | 		c = *p; | 
 | 		v = base; | 
 | 		if('0'<=c && c<='9') | 
 | 			v = c - '0'; | 
 | 		else if('a'<=c && c<='z') | 
 | 			v = c - 'a' + 10; | 
 | 		else if('A'<=c && c<='Z') | 
 | 			v = c - 'A' + 10; | 
 | 		if(v >= base) | 
 | 			break; | 
 | 		nn = n*base + v; | 
 | 		if(nn < n) | 
 | 			ovfl = 1; | 
 | 		n = nn; | 
 | 	} | 
 |  | 
 |     Return: | 
 | 	if(ndig == 0) | 
 | 		p = nptr; | 
 | 	if(endptr) | 
 | 		*endptr = p; | 
 | 	if(ovfl){ | 
 | 		if(neg) | 
 | 			return LONG_MIN; | 
 | 		return LONG_MAX; | 
 | 	} | 
 | 	if(neg) | 
 | 		return -n; | 
 | 	return n; | 
 | } | 
 |  | 
 | /* Convert buf[0:n], bytes whose character set is chset, */ | 
 | /* into a emalloc'd null-terminated Unicode string. */ | 
 | Rune* | 
 | toStr(uchar* buf, int n, int chset) | 
 | { | 
 | 	int i; | 
 | 	int m; | 
 | 	Rune ch; | 
 | 	Rune* ans; | 
 |  | 
 | 	switch(chset) { | 
 | 	case US_Ascii: | 
 | 	case ISO_8859_1: | 
 | 		ans = (Rune*)emalloc((n+1)*sizeof(Rune)); | 
 | 		for(i = 0; i < n; i++) | 
 | 			ans[i] = buf[i]; | 
 | 		ans[n] = 0; | 
 | 		break; | 
 |  | 
 | 	case UTF_8: | 
 | 		m = 0; | 
 | 		for(i = 0; i < n; ) { | 
 | 			i += chartorune(&ch, (char*)(buf+i)); | 
 | 			m++; | 
 | 		} | 
 | 		ans = (Rune*)emalloc((m+1)*sizeof(Rune)); | 
 | 		m = 0; | 
 | 		for(i = 0; i < n; ) { | 
 | 			i += chartorune(&ch, (char*)(buf+i)); | 
 | 			ans[m++] = ch; | 
 | 		} | 
 | 		ans[m] = 0; | 
 | 		break; | 
 |  | 
 | 	default: | 
 | 		ans = nil; | 
 | 		assert(0); | 
 | 	} | 
 | 	return ans; | 
 | } | 
 |  | 
 | /* Convert buf[0:n], Unicode characters, */ | 
 | /* into an emalloc'd null-terminated string in character set chset. */ | 
 | /* Use 0x80 for unconvertable characters. */ | 
 | uchar* | 
 | fromStr(Rune* buf, int n, int chset) | 
 | { | 
 | 	uchar* ans; | 
 | 	int i, lim, m; | 
 | 	Rune ch; | 
 | 	uchar* p; | 
 | 	uchar s[UTFmax]; | 
 |  | 
 | 	ans = nil; | 
 | 	switch(chset) { | 
 | 	case US_Ascii: | 
 | 	case ISO_8859_1: | 
 | 		ans = (uchar*)emalloc(n+1); | 
 | 		lim = (chset==US_Ascii)? 127 : 255; | 
 | 		for(i = 0; i < n; i++) { | 
 | 			ch = buf[i]; | 
 | 			if(ch > lim) | 
 | 				ch = 0x80; | 
 | 			ans[i] = ch; | 
 | 		} | 
 | 		ans[n] = 0; | 
 | 		break; | 
 |  | 
 | 	case UTF_8: | 
 | 		m = 0; | 
 | 		for(i = 0; i < n; i++) { | 
 | 			m += runetochar((char*)s, &buf[i]); | 
 | 		} | 
 | 		ans = (uchar*)emalloc(m+1); | 
 | 		p = ans; | 
 | 		for(i = 0; i < n; i++) | 
 | 			p += runetochar((char*)p, &buf[i]); | 
 | 		*p = 0; | 
 | 		break; | 
 |  | 
 | 	default: | 
 | 		assert(0); | 
 | 	} | 
 | 	return ans; | 
 |  | 
 | } | 
 |  | 
 | /* Convert n to emalloc'd String. */ | 
 | Rune* | 
 | _ltoStr(int n) | 
 | { | 
 | 	int m; | 
 | 	uchar buf[20]; | 
 |  | 
 | 	m = snprint((char*)buf, sizeof(buf), "%d", n); | 
 | 	return toStr(buf, m, US_Ascii); | 
 | } |