| #include <u.h> |
| #include <limits.h> |
| #include <libc.h> |
| #include <draw.h> |
| #include <html.h> |
| #include "impl.h" |
| |
| Rune whitespace[] = { ' ', '\t', '\n', '\r', '\0' }; |
| Rune notwhitespace[] = { '^', ' ', '\t', '\n', '\r' , '\0'}; |
| |
| /* All lists start out like List structure. */ |
| /* List itself can be used as list of int. */ |
| int |
| _listlen(List* l) |
| { |
| int n = 0; |
| |
| while(l != nil) { |
| l = l->next; |
| n++; |
| } |
| return n; |
| } |
| |
| /* Cons */ |
| List* |
| _newlist(int val, List* rest) |
| { |
| List* ans; |
| |
| ans = (List*)emalloc(sizeof(List)); |
| ans->val = val; |
| ans->next = rest; |
| return ans; |
| } |
| |
| /* Reverse a list in place */ |
| List* |
| _revlist(List* l) |
| { |
| List* newl; |
| List* nextl; |
| |
| newl = nil; |
| while(l != nil) { |
| nextl = l->next; |
| l->next = newl; |
| newl = l; |
| l = nextl; |
| } |
| return newl; |
| } |
| |
| /* The next few routines take a "character class" as argument. */ |
| /* e.g., "a-zA-Z", or "^ \t\n" */ |
| /* (ranges indicated by - except in first position; */ |
| /* ^ is first position means "not in" the following class) */ |
| |
| /* Splitl splits s[0:n] just before first character of class cl. */ |
| /* Answers go in (p1, n1) and (p2, n2). */ |
| /* If no split, the whole thing goes in the first component. */ |
| /* Note: answers contain pointers into original string. */ |
| void |
| _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2) |
| { |
| Rune* p; |
| |
| p = _Strnclass(s, cl, n); |
| *p1 = s; |
| if(p == nil) { |
| *n1 = n; |
| *p2 = nil; |
| *n2 = 0; |
| } |
| else { |
| *p2 = p; |
| *n1 = p-s; |
| *n2 = n-*n1; |
| } |
| } |
| |
| /* Splitr splits s[0:n] just after last character of class cl. */ |
| /* Answers go in (p1, n1) and (p2, n2). */ |
| /* If no split, the whole thing goes in the last component. */ |
| /* Note: answers contain pointers into original string. */ |
| void |
| _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2) |
| { |
| Rune* p; |
| |
| p = _Strnrclass(s, cl, n); |
| if(p == nil) { |
| *p1 = nil; |
| *n1 = 0; |
| *p2 = s; |
| *n2 = n; |
| } |
| else { |
| *p1 = s; |
| *p2 = p+1; |
| *n1 = *p2-s; |
| *n2 = n-*n1; |
| } |
| } |
| |
| /* Splitall splits s[0:n] into parts that are separated by characters from class cl. */ |
| /* Each part will have nonzero length. */ |
| /* At most alen parts are found, and pointers to their starts go into */ |
| /* the strarr array, while their lengths go into the lenarr array. */ |
| /* The return value is the number of parts found. */ |
| int |
| _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen) |
| { |
| int i; |
| Rune* p; |
| Rune* q; |
| Rune* slast; |
| |
| if(s == nil || n == 0) |
| return 0; |
| i = 0; |
| p = s; |
| slast = s+n; |
| while(p < slast && i < alen) { |
| while(p < slast && _inclass(*p, cl)) |
| p++; |
| if(p == slast) |
| break; |
| q = _Strnclass(p, cl, slast-p); |
| if(q == nil) |
| q = slast; |
| assert(q > p && q <= slast); |
| strarr[i] = p; |
| lenarr[i] = q-p; |
| i++; |
| p = q; |
| } |
| return i; |
| } |
| |
| /* Find part of s that excludes leading and trailing whitespace, */ |
| /* and return that part in *pans (and its length in *panslen). */ |
| void |
| _trimwhite(Rune* s, int n, Rune** pans, int* panslen) |
| { |
| Rune* p; |
| Rune* q; |
| |
| p = nil; |
| if(n > 0) { |
| p = _Strnclass(s, notwhitespace, n); |
| if(p != nil) { |
| q = _Strnrclass(s, notwhitespace, n); |
| assert(q != nil); |
| n = q+1-p; |
| } |
| } |
| *pans = p; |
| *panslen = n; |
| } |
| |
| /* _Strclass returns a pointer to the first element of s that is */ |
| /* a member of class cl, nil if none. */ |
| Rune* |
| _Strclass(Rune* s, Rune* cl) |
| { |
| Rune* p; |
| |
| for(p = s; *p != 0; p++) |
| if(_inclass(*p, cl)) |
| return p; |
| return nil; |
| } |
| |
| /* _Strnclass returns a pointer to the first element of s[0:n] that is */ |
| /* a member of class cl, nil if none. */ |
| Rune* |
| _Strnclass(Rune* s, Rune* cl, int n) |
| { |
| Rune* p; |
| |
| for(p = s; n-- && *p != 0; p++) |
| if(_inclass(*p, cl)) |
| return p; |
| return nil; |
| } |
| |
| /* _Strrclass returns a pointer to the last element of s that is */ |
| /* a member of class cl, nil if none */ |
| Rune* |
| _Strrclass(Rune* s, Rune* cl) |
| { |
| Rune* p; |
| |
| if(s == nil || *s == 0) |
| return nil; |
| p = s + runestrlen(s) - 1; |
| while(p >= s) { |
| if(_inclass(*p, cl)) |
| return p; |
| p--; |
| }; |
| return nil; |
| } |
| |
| /* _Strnrclass returns a pointer to the last element of s[0:n] that is */ |
| /* a member of class cl, nil if none */ |
| Rune* |
| _Strnrclass(Rune* s, Rune* cl, int n) |
| { |
| Rune* p; |
| |
| if(s == nil || *s == 0 || n == 0) |
| return nil; |
| p = s + n - 1; |
| while(p >= s) { |
| if(_inclass(*p, cl)) |
| return p; |
| p--; |
| }; |
| return nil; |
| } |
| |
| /* Is c in the class cl? */ |
| int |
| _inclass(Rune c, Rune* cl) |
| { |
| int n; |
| int ans; |
| int negate; |
| int i; |
| |
| n = _Strlen(cl); |
| if(n == 0) |
| return 0; |
| ans = 0; |
| negate = 0; |
| if(cl[0] == '^') { |
| negate = 1; |
| cl++; |
| n--; |
| } |
| for(i = 0; i < n; i++) { |
| if(cl[i] == '-' && i > 0 && i < n - 1) { |
| if(c >= cl[i - 1] && c <= cl[i + 1]) { |
| ans = 1; |
| break; |
| } |
| i++; |
| } |
| else if(c == cl[i]) { |
| ans = 1; |
| break; |
| } |
| } |
| if(negate) |
| ans = !ans; |
| return ans; |
| } |
| |
| /* Is pre a prefix of s? */ |
| int |
| _prefix(Rune* pre, Rune* s) |
| { |
| int ns; |
| int n; |
| int k; |
| |
| ns = _Strlen(s); |
| n = _Strlen(pre); |
| if(ns < n) |
| return 0; |
| for(k = 0; k < n; k++) { |
| if(pre[k] != s[k]) |
| return 0; |
| } |
| return 1; |
| } |
| |
| /* Number of runes in (null-terminated) s */ |
| int |
| _Strlen(Rune* s) |
| { |
| if(s == nil) |
| return 0; |
| return runestrlen(s); |
| } |
| |
| /* -1, 0, 1 as s1 is lexicographically less, equal greater than s2 */ |
| int |
| _Strcmp(Rune *s1, Rune *s2) |
| { |
| if(s1 == nil) |
| return (s2 == nil || *s2 == 0) ? 0 : -1; |
| if(s2 == nil) |
| return (*s1 == 0) ? 0 : 1; |
| return runestrcmp(s1, s2); |
| } |
| |
| /* Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars). */ |
| /* Also, do a case-insensitive match, assuming s2 */ |
| /* has no chars in [A-Z], only their lowercase versions. */ |
| /* (This routine is used for in-place keyword lookup, where s2 is in a keyword */ |
| /* list and s1 is some substring, possibly mixed-case, in a buffer.) */ |
| int |
| _Strncmpci(Rune *s1, int n1, Rune *s2) |
| { |
| Rune c1, c2; |
| |
| for(;;) { |
| if(n1-- == 0) { |
| if(*s2 == 0) |
| return 0; |
| return -1; |
| } |
| c1 = *s1++; |
| c2 = *s2++; |
| if(c1 >= 'A' && c1 <= 'Z') |
| c1 = c1 - 'A' + 'a'; |
| if(c1 != c2) { |
| if(c1 > c2) |
| return 1; |
| return -1; |
| } |
| } |
| } |
| |
| /* emalloc and copy */ |
| Rune* |
| _Strdup(Rune* s) |
| { |
| if(s == nil) |
| return nil; |
| return _Strndup(s, runestrlen(s)); |
| } |
| |
| /* emalloc and copy n chars of s (assume s is at least that long), */ |
| /* and add 0 terminator. */ |
| /* Return nil if n==0. */ |
| Rune* |
| _Strndup(Rune* s, int n) |
| { |
| Rune* ans; |
| |
| if(n <= 0) |
| return nil; |
| ans = _newstr(n); |
| memmove(ans, s, n*sizeof(Rune)); |
| ans[n] = 0; |
| return ans; |
| } |
| /* emalloc enough room for n Runes, plus 1 null terminator. */ |
| /* (Not initialized to anything.) */ |
| Rune* |
| _newstr(int n) |
| { |
| return (Rune*)emalloc((n+1)*sizeof(Rune)); |
| } |
| |
| /* emalloc and copy s+t */ |
| Rune* |
| _Strdup2(Rune* s, Rune* t) |
| { |
| int ns, nt; |
| Rune* ans; |
| Rune* p; |
| |
| ns = _Strlen(s); |
| nt = _Strlen(t); |
| if(ns+nt == 0) |
| return nil; |
| ans = _newstr(ns+nt); |
| p = _Stradd(ans, s, ns); |
| p = _Stradd(p, t, nt); |
| *p = 0; |
| return ans; |
| } |
| |
| /* Return emalloc'd substring s[start:stop], */ |
| Rune* |
| _Strsubstr(Rune* s, int start, int stop) |
| { |
| Rune* t; |
| |
| if(start == stop) |
| return nil; |
| t = _Strndup(s+start, stop-start); |
| return t; |
| } |
| |
| /* Copy n chars to s1 from s2, and return s1+n */ |
| Rune* |
| _Stradd(Rune* s1, Rune* s2, int n) |
| { |
| if(n == 0) |
| return s1; |
| memmove(s1, s2, n*sizeof(Rune)); |
| return s1+n; |
| } |
| |
| /* Like strtol, but converting from Rune* string */ |
| |
| /*#define LONG_MAX 2147483647L */ |
| /*#define LONG_MIN -2147483648L */ |
| |
| long |
| _Strtol(Rune* nptr, Rune** endptr, int base) |
| { |
| Rune* p; |
| long n, nn; |
| int c, ovfl, v, neg, ndig; |
| |
| p = nptr; |
| neg = 0; |
| n = 0; |
| ndig = 0; |
| ovfl = 0; |
| |
| /* |
| * White space |
| */ |
| for(;;p++){ |
| switch(*p){ |
| case ' ': |
| case '\t': |
| case '\n': |
| case '\f': |
| case '\r': |
| case '\v': |
| continue; |
| } |
| break; |
| } |
| |
| /* |
| * Sign |
| */ |
| if(*p=='-' || *p=='+') |
| if(*p++ == '-') |
| neg = 1; |
| |
| /* |
| * Base |
| */ |
| if(base==0){ |
| if(*p != '0') |
| base = 10; |
| else{ |
| base = 8; |
| if(p[1]=='x' || p[1]=='X'){ |
| p += 2; |
| base = 16; |
| } |
| } |
| }else if(base==16 && *p=='0'){ |
| if(p[1]=='x' || p[1]=='X') |
| p += 2; |
| }else if(base<0 || 36<base) |
| goto Return; |
| |
| /* |
| * Non-empty sequence of digits |
| */ |
| for(;; p++,ndig++){ |
| c = *p; |
| v = base; |
| if('0'<=c && c<='9') |
| v = c - '0'; |
| else if('a'<=c && c<='z') |
| v = c - 'a' + 10; |
| else if('A'<=c && c<='Z') |
| v = c - 'A' + 10; |
| if(v >= base) |
| break; |
| nn = n*base + v; |
| if(nn < n) |
| ovfl = 1; |
| n = nn; |
| } |
| |
| Return: |
| if(ndig == 0) |
| p = nptr; |
| if(endptr) |
| *endptr = p; |
| if(ovfl){ |
| if(neg) |
| return LONG_MIN; |
| return LONG_MAX; |
| } |
| if(neg) |
| return -n; |
| return n; |
| } |
| |
| /* Convert buf[0:n], bytes whose character set is chset, */ |
| /* into a emalloc'd null-terminated Unicode string. */ |
| Rune* |
| toStr(uchar* buf, int n, int chset) |
| { |
| int i; |
| int m; |
| Rune ch; |
| Rune* ans; |
| |
| switch(chset) { |
| case US_Ascii: |
| case ISO_8859_1: |
| ans = (Rune*)emalloc((n+1)*sizeof(Rune)); |
| for(i = 0; i < n; i++) |
| ans[i] = buf[i]; |
| ans[n] = 0; |
| break; |
| |
| case UTF_8: |
| m = 0; |
| for(i = 0; i < n; ) { |
| i += chartorune(&ch, (char*)(buf+i)); |
| m++; |
| } |
| ans = (Rune*)emalloc((m+1)*sizeof(Rune)); |
| m = 0; |
| for(i = 0; i < n; ) { |
| i += chartorune(&ch, (char*)(buf+i)); |
| ans[m++] = ch; |
| } |
| ans[m] = 0; |
| break; |
| |
| default: |
| ans = nil; |
| assert(0); |
| } |
| return ans; |
| } |
| |
| /* Convert buf[0:n], Unicode characters, */ |
| /* into an emalloc'd null-terminated string in character set chset. */ |
| /* Use 0x80 for unconvertable characters. */ |
| uchar* |
| fromStr(Rune* buf, int n, int chset) |
| { |
| uchar* ans; |
| int i, lim, m; |
| Rune ch; |
| uchar* p; |
| uchar s[UTFmax]; |
| |
| ans = nil; |
| switch(chset) { |
| case US_Ascii: |
| case ISO_8859_1: |
| ans = (uchar*)emalloc(n+1); |
| lim = (chset==US_Ascii)? 127 : 255; |
| for(i = 0; i < n; i++) { |
| ch = buf[i]; |
| if(ch > lim) |
| ch = 0x80; |
| ans[i] = ch; |
| } |
| ans[n] = 0; |
| break; |
| |
| case UTF_8: |
| m = 0; |
| for(i = 0; i < n; i++) { |
| m += runetochar((char*)s, &buf[i]); |
| } |
| ans = (uchar*)emalloc(m+1); |
| p = ans; |
| for(i = 0; i < n; i++) |
| p += runetochar((char*)p, &buf[i]); |
| *p = 0; |
| break; |
| |
| default: |
| assert(0); |
| } |
| return ans; |
| |
| } |
| |
| /* Convert n to emalloc'd String. */ |
| Rune* |
| _ltoStr(int n) |
| { |
| int m; |
| uchar buf[20]; |
| |
| m = snprint((char*)buf, sizeof(buf), "%d", n); |
| return toStr(buf, m, US_Ascii); |
| } |