| #include <u.h> |
| #include <limits.h> |
| #include <libc.h> |
| #include <draw.h> |
| #include <html.h> |
| #include "impl.h" |
| |
| Rune whitespace[] = { ' ', '\t', '\n', '\r', '\0' }; |
| Rune notwhitespace[] = { '^', ' ', '\t', '\n', '\r' , '\0'}; |
| |
| // All lists start out like List structure. |
| // List itself can be used as list of int. |
| int |
| _listlen(List* l) |
| { |
| int n = 0; |
| |
| while(l != nil) { |
| l = l->next; |
| n++; |
| } |
| return n; |
| } |
| |
| // Cons |
| List* |
| _newlist(int val, List* rest) |
| { |
| List* ans; |
| |
| ans = (List*)emalloc(sizeof(List)); |
| ans->val = val; |
| ans->next = rest; |
| return ans; |
| } |
| |
| // Reverse a list in place |
| List* |
| _revlist(List* l) |
| { |
| List* newl; |
| List* nextl; |
| |
| newl = nil; |
| while(l != nil) { |
| nextl = l->next; |
| l->next = newl; |
| newl = l; |
| l = nextl; |
| } |
| return newl; |
| } |
| |
| // The next few routines take a "character class" as argument. |
| // e.g., "a-zA-Z", or "^ \t\n" |
| // (ranges indicated by - except in first position; |
| // ^ is first position means "not in" the following class) |
| |
| // Splitl splits s[0:n] just before first character of class cl. |
| // Answers go in (p1, n1) and (p2, n2). |
| // If no split, the whole thing goes in the first component. |
| // Note: answers contain pointers into original string. |
| void |
| _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2) |
| { |
| Rune* p; |
| |
| p = _Strnclass(s, cl, n); |
| *p1 = s; |
| if(p == nil) { |
| *n1 = n; |
| *p2 = nil; |
| *n2 = 0; |
| } |
| else { |
| *p2 = p; |
| *n1 = p-s; |
| *n2 = n-*n1; |
| } |
| } |
| |
| // Splitr splits s[0:n] just after last character of class cl. |
| // Answers go in (p1, n1) and (p2, n2). |
| // If no split, the whole thing goes in the last component. |
| // Note: answers contain pointers into original string. |
| void |
| _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2) |
| { |
| Rune* p; |
| |
| p = _Strnrclass(s, cl, n); |
| if(p == nil) { |
| *p1 = nil; |
| *n1 = 0; |
| *p2 = s; |
| *n2 = n; |
| } |
| else { |
| *p1 = s; |
| *p2 = p+1; |
| *n1 = *p2-s; |
| *n2 = n-*n1; |
| } |
| } |
| |
| // Splitall splits s[0:n] into parts that are separated by characters from class cl. |
| // Each part will have nonzero length. |
| // At most alen parts are found, and pointers to their starts go into |
| // the strarr array, while their lengths go into the lenarr array. |
| // The return value is the number of parts found. |
| int |
| _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen) |
| { |
| int i; |
| Rune* p; |
| Rune* q; |
| Rune* slast; |
| |
| if(s == nil || n == 0) |
| return 0; |
| i = 0; |
| p = s; |
| slast = s+n; |
| while(p < slast && i < alen) { |
| while(p < slast && _inclass(*p, cl)) |
| p++; |
| if(p == slast) |
| break; |
| q = _Strnclass(p, cl, slast-p); |
| if(q == nil) |
| q = slast; |
| assert(q > p && q <= slast); |
| strarr[i] = p; |
| lenarr[i] = q-p; |
| i++; |
| p = q; |
| } |
| return i; |
| } |
| |
| // Find part of s that excludes leading and trailing whitespace, |
| // and return that part in *pans (and its length in *panslen). |
| void |
| _trimwhite(Rune* s, int n, Rune** pans, int* panslen) |
| { |
| Rune* p; |
| Rune* q; |
| |
| p = nil; |
| if(n > 0) { |
| p = _Strnclass(s, notwhitespace, n); |
| if(p != nil) { |
| q = _Strnrclass(s, notwhitespace, n); |
| assert(q != nil); |
| n = q+1-p; |
| } |
| } |
| *pans = p; |
| *panslen = n; |
| } |
| |
| // _Strclass returns a pointer to the first element of s that is |
| // a member of class cl, nil if none. |
| Rune* |
| _Strclass(Rune* s, Rune* cl) |
| { |
| Rune* p; |
| |
| for(p = s; *p != 0; p++) |
| if(_inclass(*p, cl)) |
| return p; |
| return nil; |
| } |
| |
| // _Strnclass returns a pointer to the first element of s[0:n] that is |
| // a member of class cl, nil if none. |
| Rune* |
| _Strnclass(Rune* s, Rune* cl, int n) |
| { |
| Rune* p; |
| |
| for(p = s; n-- && *p != 0; p++) |
| if(_inclass(*p, cl)) |
| return p; |
| return nil; |
| } |
| |
| // _Strrclass returns a pointer to the last element of s that is |
| // a member of class cl, nil if none |
| Rune* |
| _Strrclass(Rune* s, Rune* cl) |
| { |
| Rune* p; |
| |
| if(s == nil || *s == 0) |
| return nil; |
| p = s + runestrlen(s) - 1; |
| while(p >= s) { |
| if(_inclass(*p, cl)) |
| return p; |
| p--; |
| }; |
| return nil; |
| } |
| |
| // _Strnrclass returns a pointer to the last element of s[0:n] that is |
| // a member of class cl, nil if none |
| Rune* |
| _Strnrclass(Rune* s, Rune* cl, int n) |
| { |
| Rune* p; |
| |
| if(s == nil || *s == 0 || n == 0) |
| return nil; |
| p = s + n - 1; |
| while(p >= s) { |
| if(_inclass(*p, cl)) |
| return p; |
| p--; |
| }; |
| return nil; |
| } |
| |
| // Is c in the class cl? |
| int |
| _inclass(Rune c, Rune* cl) |
| { |
| int n; |
| int ans; |
| int negate; |
| int i; |
| |
| n = _Strlen(cl); |
| if(n == 0) |
| return 0; |
| ans = 0; |
| negate = 0; |
| if(cl[0] == '^') { |
| negate = 1; |
| cl++; |
| n--; |
| } |
| for(i = 0; i < n; i++) { |
| if(cl[i] == '-' && i > 0 && i < n - 1) { |
| if(c >= cl[i - 1] && c <= cl[i + 1]) { |
| ans = 1; |
| break; |
| } |
| i++; |
| } |
| else if(c == cl[i]) { |
| ans = 1; |
| break; |
| } |
| } |
| if(negate) |
| ans = !ans; |
| return ans; |
| } |
| |
| // Is pre a prefix of s? |
| int |
| _prefix(Rune* pre, Rune* s) |
| { |
| int ns; |
| int n; |
| int k; |
| |
| ns = _Strlen(s); |
| n = _Strlen(pre); |
| if(ns < n) |
| return 0; |
| for(k = 0; k < n; k++) { |
| if(pre[k] != s[k]) |
| return 0; |
| } |
| return 1; |
| } |
| |
| // Number of runes in (null-terminated) s |
| int |
| _Strlen(Rune* s) |
| { |
| if(s == nil) |
| return 0; |
| return runestrlen(s); |
| } |
| |
| // -1, 0, 1 as s1 is lexicographically less, equal greater than s2 |
| int |
| _Strcmp(Rune *s1, Rune *s2) |
| { |
| if(s1 == nil) |
| return (s2 == nil || *s2 == 0) ? 0 : -1; |
| if(s2 == nil) |
| return (*s1 == 0) ? 0 : 1; |
| return runestrcmp(s1, s2); |
| } |
| |
| // Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars). |
| // Also, do a case-insensitive match, assuming s2 |
| // has no chars in [A-Z], only their lowercase versions. |
| // (This routine is used for in-place keyword lookup, where s2 is in a keyword |
| // list and s1 is some substring, possibly mixed-case, in a buffer.) |
| int |
| _Strncmpci(Rune *s1, int n1, Rune *s2) |
| { |
| Rune c1, c2; |
| |
| for(;;) { |
| if(n1-- == 0) { |
| if(*s2 == 0) |
| return 0; |
| return -1; |
| } |
| c1 = *s1++; |
| c2 = *s2++; |
| if(c1 >= 'A' && c1 <= 'Z') |
| c1 = c1 - 'A' + 'a'; |
| if(c1 != c2) { |
| if(c1 > c2) |
| return 1; |
| return -1; |
| } |
| } |
| } |
| |
| // emalloc and copy |
| Rune* |
| _Strdup(Rune* s) |
| { |
| if(s == nil) |
| return nil; |
| return _Strndup(s, runestrlen(s)); |
| } |
| |
| // emalloc and copy n chars of s (assume s is at least that long), |
| // and add 0 terminator. |
| // Return nil if n==0. |
| Rune* |
| _Strndup(Rune* s, int n) |
| { |
| Rune* ans; |
| |
| if(n <= 0) |
| return nil; |
| ans = _newstr(n); |
| memmove(ans, s, n*sizeof(Rune)); |
| ans[n] = 0; |
| return ans; |
| } |
| // emalloc enough room for n Runes, plus 1 null terminator. |
| // (Not initialized to anything.) |
| Rune* |
| _newstr(int n) |
| { |
| return (Rune*)emalloc((n+1)*sizeof(Rune)); |
| } |
| |
| // emalloc and copy s+t |
| Rune* |
| _Strdup2(Rune* s, Rune* t) |
| { |
| int ns, nt; |
| Rune* ans; |
| Rune* p; |
| |
| ns = _Strlen(s); |
| nt = _Strlen(t); |
| if(ns+nt == 0) |
| return nil; |
| ans = _newstr(ns+nt); |
| p = _Stradd(ans, s, ns); |
| p = _Stradd(p, t, nt); |
| *p = 0; |
| return ans; |
| } |
| |
| // Return emalloc'd substring s[start:stop], |
| Rune* |
| _Strsubstr(Rune* s, int start, int stop) |
| { |
| Rune* t; |
| |
| if(start == stop) |
| return nil; |
| t = _Strndup(s+start, stop-start); |
| return t; |
| } |
| |
| // Copy n chars to s1 from s2, and return s1+n |
| Rune* |
| _Stradd(Rune* s1, Rune* s2, int n) |
| { |
| if(n == 0) |
| return s1; |
| memmove(s1, s2, n*sizeof(Rune)); |
| return s1+n; |
| } |
| |
| // Like strtol, but converting from Rune* string |
| |
| //#define LONG_MAX 2147483647L |
| //#define LONG_MIN -2147483648L |
| |
| long |
| _Strtol(Rune* nptr, Rune** endptr, int base) |
| { |
| Rune* p; |
| long n, nn; |
| int c, ovfl, v, neg, ndig; |
| |
| p = nptr; |
| neg = 0; |
| n = 0; |
| ndig = 0; |
| ovfl = 0; |
| |
| /* |
| * White space |
| */ |
| for(;;p++){ |
| switch(*p){ |
| case ' ': |
| case '\t': |
| case '\n': |
| case '\f': |
| case '\r': |
| case '\v': |
| continue; |
| } |
| break; |
| } |
| |
| /* |
| * Sign |
| */ |
| if(*p=='-' || *p=='+') |
| if(*p++ == '-') |
| neg = 1; |
| |
| /* |
| * Base |
| */ |
| if(base==0){ |
| if(*p != '0') |
| base = 10; |
| else{ |
| base = 8; |
| if(p[1]=='x' || p[1]=='X'){ |
| p += 2; |
| base = 16; |
| } |
| } |
| }else if(base==16 && *p=='0'){ |
| if(p[1]=='x' || p[1]=='X') |
| p += 2; |
| }else if(base<0 || 36<base) |
| goto Return; |
| |
| /* |
| * Non-empty sequence of digits |
| */ |
| for(;; p++,ndig++){ |
| c = *p; |
| v = base; |
| if('0'<=c && c<='9') |
| v = c - '0'; |
| else if('a'<=c && c<='z') |
| v = c - 'a' + 10; |
| else if('A'<=c && c<='Z') |
| v = c - 'A' + 10; |
| if(v >= base) |
| break; |
| nn = n*base + v; |
| if(nn < n) |
| ovfl = 1; |
| n = nn; |
| } |
| |
| Return: |
| if(ndig == 0) |
| p = nptr; |
| if(endptr) |
| *endptr = p; |
| if(ovfl){ |
| if(neg) |
| return LONG_MIN; |
| return LONG_MAX; |
| } |
| if(neg) |
| return -n; |
| return n; |
| } |
| |
| // Convert buf[0:n], bytes whose character set is chset, |
| // into a emalloc'd null-terminated Unicode string. |
| Rune* |
| toStr(uchar* buf, int n, int chset) |
| { |
| int i; |
| int m; |
| Rune ch; |
| Rune* ans; |
| |
| switch(chset) { |
| case US_Ascii: |
| case ISO_8859_1: |
| ans = (Rune*)emalloc((n+1)*sizeof(Rune)); |
| for(i = 0; i < n; i++) |
| ans[i] = buf[i]; |
| ans[n] = 0; |
| break; |
| |
| case UTF_8: |
| m = 0; |
| for(i = 0; i < n; ) { |
| i += chartorune(&ch, (char*)(buf+i)); |
| m++; |
| } |
| ans = (Rune*)emalloc((m+1)*sizeof(Rune)); |
| m = 0; |
| for(i = 0; i < n; ) { |
| i += chartorune(&ch, (char*)(buf+i)); |
| ans[m++] = ch; |
| } |
| ans[m] = 0; |
| break; |
| |
| default: |
| ans = nil; |
| assert(0); |
| } |
| return ans; |
| } |
| |
| // Convert buf[0:n], Unicode characters, |
| // into an emalloc'd null-terminated string in character set chset. |
| // Use 0x80 for unconvertable characters. |
| uchar* |
| fromStr(Rune* buf, int n, int chset) |
| { |
| uchar* ans; |
| int i, lim, m; |
| Rune ch; |
| uchar* p; |
| uchar s[UTFmax]; |
| |
| ans = nil; |
| switch(chset) { |
| case US_Ascii: |
| case ISO_8859_1: |
| ans = (uchar*)emalloc(n+1); |
| lim = (chset==US_Ascii)? 127 : 255; |
| for(i = 0; i < n; i++) { |
| ch = buf[i]; |
| if(ch > lim) |
| ch = 0x80; |
| ans[i] = ch; |
| } |
| ans[n] = 0; |
| break; |
| |
| case UTF_8: |
| m = 0; |
| for(i = 0; i < n; i++) { |
| m += runetochar((char*)s, &buf[i]); |
| } |
| ans = (uchar*)emalloc(m+1); |
| p = ans; |
| for(i = 0; i < n; i++) |
| p += runetochar((char*)p, &buf[i]); |
| *p = 0; |
| break; |
| |
| default: |
| assert(0); |
| } |
| return ans; |
| |
| } |
| |
| // Convert n to emalloc'd String. |
| Rune* |
| _ltoStr(int n) |
| { |
| int m; |
| uchar buf[20]; |
| |
| m = snprint((char*)buf, sizeof(buf), "%d", n); |
| return toStr(buf, m, US_Ascii); |
| } |