wkj | 7cf289c | 2004-04-06 19:06:52 +0000 | [diff] [blame] | 1 | #include <u.h> |
rsc | 650deb7 | 2004-04-08 19:36:35 +0000 | [diff] [blame] | 2 | #include <limits.h> |
wkj | 7cf289c | 2004-04-06 19:06:52 +0000 | [diff] [blame] | 3 | #include <libc.h> |
| 4 | #include <draw.h> |
| 5 | #include <html.h> |
| 6 | #include "impl.h" |
| 7 | |
| 8 | Rune whitespace[] = { ' ', '\t', '\n', '\r', '\0' }; |
| 9 | Rune notwhitespace[] = { '^', ' ', '\t', '\n', '\r' , '\0'}; |
| 10 | |
| 11 | // All lists start out like List structure. |
| 12 | // List itself can be used as list of int. |
| 13 | int |
| 14 | _listlen(List* l) |
| 15 | { |
| 16 | int n = 0; |
| 17 | |
| 18 | while(l != nil) { |
| 19 | l = l->next; |
| 20 | n++; |
| 21 | } |
| 22 | return n; |
| 23 | } |
| 24 | |
| 25 | // Cons |
| 26 | List* |
| 27 | _newlist(int val, List* rest) |
| 28 | { |
| 29 | List* ans; |
| 30 | |
| 31 | ans = (List*)emalloc(sizeof(List)); |
| 32 | ans->val = val; |
| 33 | ans->next = rest; |
| 34 | return ans; |
| 35 | } |
| 36 | |
| 37 | // Reverse a list in place |
| 38 | List* |
| 39 | _revlist(List* l) |
| 40 | { |
| 41 | List* newl; |
| 42 | List* nextl; |
| 43 | |
| 44 | newl = nil; |
| 45 | while(l != nil) { |
| 46 | nextl = l->next; |
| 47 | l->next = newl; |
| 48 | newl = l; |
| 49 | l = nextl; |
| 50 | } |
| 51 | return newl; |
| 52 | } |
| 53 | |
| 54 | // The next few routines take a "character class" as argument. |
| 55 | // e.g., "a-zA-Z", or "^ \t\n" |
| 56 | // (ranges indicated by - except in first position; |
| 57 | // ^ is first position means "not in" the following class) |
| 58 | |
| 59 | // Splitl splits s[0:n] just before first character of class cl. |
| 60 | // Answers go in (p1, n1) and (p2, n2). |
| 61 | // If no split, the whole thing goes in the first component. |
| 62 | // Note: answers contain pointers into original string. |
| 63 | void |
| 64 | _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2) |
| 65 | { |
| 66 | Rune* p; |
| 67 | |
| 68 | p = _Strnclass(s, cl, n); |
| 69 | *p1 = s; |
| 70 | if(p == nil) { |
| 71 | *n1 = n; |
| 72 | *p2 = nil; |
| 73 | *n2 = 0; |
| 74 | } |
| 75 | else { |
| 76 | *p2 = p; |
| 77 | *n1 = p-s; |
| 78 | *n2 = n-*n1; |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | // Splitr splits s[0:n] just after last character of class cl. |
| 83 | // Answers go in (p1, n1) and (p2, n2). |
| 84 | // If no split, the whole thing goes in the last component. |
| 85 | // Note: answers contain pointers into original string. |
| 86 | void |
| 87 | _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2) |
| 88 | { |
| 89 | Rune* p; |
| 90 | |
| 91 | p = _Strnrclass(s, cl, n); |
| 92 | if(p == nil) { |
| 93 | *p1 = nil; |
| 94 | *n1 = 0; |
| 95 | *p2 = s; |
| 96 | *n2 = n; |
| 97 | } |
| 98 | else { |
| 99 | *p1 = s; |
| 100 | *p2 = p+1; |
| 101 | *n1 = *p2-s; |
| 102 | *n2 = n-*n1; |
| 103 | } |
| 104 | } |
| 105 | |
| 106 | // Splitall splits s[0:n] into parts that are separated by characters from class cl. |
| 107 | // Each part will have nonzero length. |
| 108 | // At most alen parts are found, and pointers to their starts go into |
| 109 | // the strarr array, while their lengths go into the lenarr array. |
| 110 | // The return value is the number of parts found. |
| 111 | int |
| 112 | _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen) |
| 113 | { |
| 114 | int i; |
| 115 | Rune* p; |
| 116 | Rune* q; |
| 117 | Rune* slast; |
| 118 | |
| 119 | if(s == nil || n == 0) |
| 120 | return 0; |
| 121 | i = 0; |
| 122 | p = s; |
| 123 | slast = s+n; |
| 124 | while(p < slast && i < alen) { |
| 125 | while(p < slast && _inclass(*p, cl)) |
| 126 | p++; |
| 127 | if(p == slast) |
| 128 | break; |
| 129 | q = _Strnclass(p, cl, slast-p); |
| 130 | if(q == nil) |
| 131 | q = slast; |
| 132 | assert(q > p && q <= slast); |
| 133 | strarr[i] = p; |
| 134 | lenarr[i] = q-p; |
| 135 | i++; |
| 136 | p = q; |
| 137 | } |
| 138 | return i; |
| 139 | } |
| 140 | |
| 141 | // Find part of s that excludes leading and trailing whitespace, |
| 142 | // and return that part in *pans (and its length in *panslen). |
| 143 | void |
| 144 | _trimwhite(Rune* s, int n, Rune** pans, int* panslen) |
| 145 | { |
| 146 | Rune* p; |
| 147 | Rune* q; |
| 148 | |
| 149 | p = nil; |
| 150 | if(n > 0) { |
| 151 | p = _Strnclass(s, notwhitespace, n); |
| 152 | if(p != nil) { |
| 153 | q = _Strnrclass(s, notwhitespace, n); |
| 154 | assert(q != nil); |
| 155 | n = q+1-p; |
| 156 | } |
| 157 | } |
| 158 | *pans = p; |
| 159 | *panslen = n; |
| 160 | } |
| 161 | |
| 162 | // _Strclass returns a pointer to the first element of s that is |
| 163 | // a member of class cl, nil if none. |
| 164 | Rune* |
| 165 | _Strclass(Rune* s, Rune* cl) |
| 166 | { |
| 167 | Rune* p; |
| 168 | |
| 169 | for(p = s; *p != 0; p++) |
| 170 | if(_inclass(*p, cl)) |
| 171 | return p; |
| 172 | return nil; |
| 173 | } |
| 174 | |
| 175 | // _Strnclass returns a pointer to the first element of s[0:n] that is |
| 176 | // a member of class cl, nil if none. |
| 177 | Rune* |
| 178 | _Strnclass(Rune* s, Rune* cl, int n) |
| 179 | { |
| 180 | Rune* p; |
| 181 | |
| 182 | for(p = s; n-- && *p != 0; p++) |
| 183 | if(_inclass(*p, cl)) |
| 184 | return p; |
| 185 | return nil; |
| 186 | } |
| 187 | |
| 188 | // _Strrclass returns a pointer to the last element of s that is |
| 189 | // a member of class cl, nil if none |
| 190 | Rune* |
| 191 | _Strrclass(Rune* s, Rune* cl) |
| 192 | { |
| 193 | Rune* p; |
| 194 | |
| 195 | if(s == nil || *s == 0) |
| 196 | return nil; |
| 197 | p = s + runestrlen(s) - 1; |
| 198 | while(p >= s) { |
| 199 | if(_inclass(*p, cl)) |
| 200 | return p; |
| 201 | p--; |
| 202 | }; |
| 203 | return nil; |
| 204 | } |
| 205 | |
| 206 | // _Strnrclass returns a pointer to the last element of s[0:n] that is |
| 207 | // a member of class cl, nil if none |
| 208 | Rune* |
| 209 | _Strnrclass(Rune* s, Rune* cl, int n) |
| 210 | { |
| 211 | Rune* p; |
| 212 | |
| 213 | if(s == nil || *s == 0 || n == 0) |
| 214 | return nil; |
| 215 | p = s + n - 1; |
| 216 | while(p >= s) { |
| 217 | if(_inclass(*p, cl)) |
| 218 | return p; |
| 219 | p--; |
| 220 | }; |
| 221 | return nil; |
| 222 | } |
| 223 | |
| 224 | // Is c in the class cl? |
| 225 | int |
| 226 | _inclass(Rune c, Rune* cl) |
| 227 | { |
| 228 | int n; |
| 229 | int ans; |
| 230 | int negate; |
| 231 | int i; |
| 232 | |
| 233 | n = _Strlen(cl); |
| 234 | if(n == 0) |
| 235 | return 0; |
| 236 | ans = 0; |
| 237 | negate = 0; |
| 238 | if(cl[0] == '^') { |
| 239 | negate = 1; |
| 240 | cl++; |
| 241 | n--; |
| 242 | } |
| 243 | for(i = 0; i < n; i++) { |
| 244 | if(cl[i] == '-' && i > 0 && i < n - 1) { |
| 245 | if(c >= cl[i - 1] && c <= cl[i + 1]) { |
| 246 | ans = 1; |
| 247 | break; |
| 248 | } |
| 249 | i++; |
| 250 | } |
| 251 | else if(c == cl[i]) { |
| 252 | ans = 1; |
| 253 | break; |
| 254 | } |
| 255 | } |
| 256 | if(negate) |
| 257 | ans = !ans; |
| 258 | return ans; |
| 259 | } |
| 260 | |
| 261 | // Is pre a prefix of s? |
| 262 | int |
| 263 | _prefix(Rune* pre, Rune* s) |
| 264 | { |
| 265 | int ns; |
| 266 | int n; |
| 267 | int k; |
| 268 | |
| 269 | ns = _Strlen(s); |
| 270 | n = _Strlen(pre); |
| 271 | if(ns < n) |
| 272 | return 0; |
| 273 | for(k = 0; k < n; k++) { |
| 274 | if(pre[k] != s[k]) |
| 275 | return 0; |
| 276 | } |
| 277 | return 1; |
| 278 | } |
| 279 | |
| 280 | // Number of runes in (null-terminated) s |
| 281 | int |
| 282 | _Strlen(Rune* s) |
| 283 | { |
| 284 | if(s == nil) |
| 285 | return 0; |
| 286 | return runestrlen(s); |
| 287 | } |
| 288 | |
| 289 | // -1, 0, 1 as s1 is lexicographically less, equal greater than s2 |
| 290 | int |
| 291 | _Strcmp(Rune *s1, Rune *s2) |
| 292 | { |
| 293 | if(s1 == nil) |
| 294 | return (s2 == nil || *s2 == 0) ? 0 : -1; |
| 295 | if(s2 == nil) |
| 296 | return (*s1 == 0) ? 0 : 1; |
| 297 | return runestrcmp(s1, s2); |
| 298 | } |
| 299 | |
| 300 | // Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars). |
| 301 | // Also, do a case-insensitive match, assuming s2 |
| 302 | // has no chars in [A-Z], only their lowercase versions. |
| 303 | // (This routine is used for in-place keyword lookup, where s2 is in a keyword |
| 304 | // list and s1 is some substring, possibly mixed-case, in a buffer.) |
| 305 | int |
| 306 | _Strncmpci(Rune *s1, int n1, Rune *s2) |
| 307 | { |
| 308 | Rune c1, c2; |
| 309 | |
| 310 | for(;;) { |
| 311 | if(n1-- == 0) { |
| 312 | if(*s2 == 0) |
| 313 | return 0; |
| 314 | return -1; |
| 315 | } |
| 316 | c1 = *s1++; |
| 317 | c2 = *s2++; |
| 318 | if(c1 >= 'A' && c1 <= 'Z') |
| 319 | c1 = c1 - 'A' + 'a'; |
| 320 | if(c1 != c2) { |
| 321 | if(c1 > c2) |
| 322 | return 1; |
| 323 | return -1; |
| 324 | } |
| 325 | } |
| 326 | } |
| 327 | |
| 328 | // emalloc and copy |
| 329 | Rune* |
| 330 | _Strdup(Rune* s) |
| 331 | { |
| 332 | if(s == nil) |
| 333 | return nil; |
| 334 | return _Strndup(s, runestrlen(s)); |
| 335 | } |
| 336 | |
| 337 | // emalloc and copy n chars of s (assume s is at least that long), |
| 338 | // and add 0 terminator. |
| 339 | // Return nil if n==0. |
| 340 | Rune* |
| 341 | _Strndup(Rune* s, int n) |
| 342 | { |
| 343 | Rune* ans; |
| 344 | |
| 345 | if(n <= 0) |
| 346 | return nil; |
| 347 | ans = _newstr(n); |
| 348 | memmove(ans, s, n*sizeof(Rune)); |
| 349 | ans[n] = 0; |
| 350 | return ans; |
| 351 | } |
| 352 | // emalloc enough room for n Runes, plus 1 null terminator. |
| 353 | // (Not initialized to anything.) |
| 354 | Rune* |
| 355 | _newstr(int n) |
| 356 | { |
| 357 | return (Rune*)emalloc((n+1)*sizeof(Rune)); |
| 358 | } |
| 359 | |
| 360 | // emalloc and copy s+t |
| 361 | Rune* |
| 362 | _Strdup2(Rune* s, Rune* t) |
| 363 | { |
| 364 | int ns, nt; |
| 365 | Rune* ans; |
| 366 | Rune* p; |
| 367 | |
| 368 | ns = _Strlen(s); |
| 369 | nt = _Strlen(t); |
| 370 | if(ns+nt == 0) |
| 371 | return nil; |
| 372 | ans = _newstr(ns+nt); |
| 373 | p = _Stradd(ans, s, ns); |
| 374 | p = _Stradd(p, t, nt); |
| 375 | *p = 0; |
| 376 | return ans; |
| 377 | } |
| 378 | |
| 379 | // Return emalloc'd substring s[start:stop], |
| 380 | Rune* |
| 381 | _Strsubstr(Rune* s, int start, int stop) |
| 382 | { |
| 383 | Rune* t; |
| 384 | |
| 385 | if(start == stop) |
| 386 | return nil; |
| 387 | t = _Strndup(s+start, stop-start); |
| 388 | return t; |
| 389 | } |
| 390 | |
| 391 | // Copy n chars to s1 from s2, and return s1+n |
| 392 | Rune* |
| 393 | _Stradd(Rune* s1, Rune* s2, int n) |
| 394 | { |
| 395 | if(n == 0) |
| 396 | return s1; |
| 397 | memmove(s1, s2, n*sizeof(Rune)); |
| 398 | return s1+n; |
| 399 | } |
| 400 | |
| 401 | // Like strtol, but converting from Rune* string |
| 402 | |
| 403 | //#define LONG_MAX 2147483647L |
| 404 | //#define LONG_MIN -2147483648L |
| 405 | |
| 406 | long |
| 407 | _Strtol(Rune* nptr, Rune** endptr, int base) |
| 408 | { |
| 409 | Rune* p; |
| 410 | long n, nn; |
| 411 | int c, ovfl, v, neg, ndig; |
| 412 | |
| 413 | p = nptr; |
| 414 | neg = 0; |
| 415 | n = 0; |
| 416 | ndig = 0; |
| 417 | ovfl = 0; |
| 418 | |
| 419 | /* |
| 420 | * White space |
| 421 | */ |
| 422 | for(;;p++){ |
| 423 | switch(*p){ |
| 424 | case ' ': |
| 425 | case '\t': |
| 426 | case '\n': |
| 427 | case '\f': |
| 428 | case '\r': |
| 429 | case '\v': |
| 430 | continue; |
| 431 | } |
| 432 | break; |
| 433 | } |
| 434 | |
| 435 | /* |
| 436 | * Sign |
| 437 | */ |
| 438 | if(*p=='-' || *p=='+') |
| 439 | if(*p++ == '-') |
| 440 | neg = 1; |
| 441 | |
| 442 | /* |
| 443 | * Base |
| 444 | */ |
| 445 | if(base==0){ |
| 446 | if(*p != '0') |
| 447 | base = 10; |
| 448 | else{ |
| 449 | base = 8; |
| 450 | if(p[1]=='x' || p[1]=='X'){ |
| 451 | p += 2; |
| 452 | base = 16; |
| 453 | } |
| 454 | } |
| 455 | }else if(base==16 && *p=='0'){ |
| 456 | if(p[1]=='x' || p[1]=='X') |
| 457 | p += 2; |
| 458 | }else if(base<0 || 36<base) |
| 459 | goto Return; |
| 460 | |
| 461 | /* |
| 462 | * Non-empty sequence of digits |
| 463 | */ |
| 464 | for(;; p++,ndig++){ |
| 465 | c = *p; |
| 466 | v = base; |
| 467 | if('0'<=c && c<='9') |
| 468 | v = c - '0'; |
| 469 | else if('a'<=c && c<='z') |
| 470 | v = c - 'a' + 10; |
| 471 | else if('A'<=c && c<='Z') |
| 472 | v = c - 'A' + 10; |
| 473 | if(v >= base) |
| 474 | break; |
| 475 | nn = n*base + v; |
| 476 | if(nn < n) |
| 477 | ovfl = 1; |
| 478 | n = nn; |
| 479 | } |
| 480 | |
| 481 | Return: |
| 482 | if(ndig == 0) |
| 483 | p = nptr; |
| 484 | if(endptr) |
| 485 | *endptr = p; |
| 486 | if(ovfl){ |
| 487 | if(neg) |
| 488 | return LONG_MIN; |
| 489 | return LONG_MAX; |
| 490 | } |
| 491 | if(neg) |
| 492 | return -n; |
| 493 | return n; |
| 494 | } |
| 495 | |
| 496 | // Convert buf[0:n], bytes whose character set is chset, |
| 497 | // into a emalloc'd null-terminated Unicode string. |
| 498 | Rune* |
| 499 | toStr(uchar* buf, int n, int chset) |
| 500 | { |
| 501 | int i; |
| 502 | int m; |
| 503 | Rune ch; |
| 504 | Rune* ans; |
| 505 | |
| 506 | switch(chset) { |
| 507 | case US_Ascii: |
| 508 | case ISO_8859_1: |
| 509 | ans = (Rune*)emalloc((n+1)*sizeof(Rune)); |
| 510 | for(i = 0; i < n; i++) |
| 511 | ans[i] = buf[i]; |
| 512 | ans[n] = 0; |
| 513 | break; |
| 514 | |
| 515 | case UTF_8: |
| 516 | m = 0; |
| 517 | for(i = 0; i < n; ) { |
| 518 | i += chartorune(&ch, (char*)(buf+i)); |
| 519 | m++; |
| 520 | } |
| 521 | ans = (Rune*)emalloc((m+1)*sizeof(Rune)); |
| 522 | m = 0; |
| 523 | for(i = 0; i < n; ) { |
| 524 | i += chartorune(&ch, (char*)(buf+i)); |
| 525 | ans[m++] = ch; |
| 526 | } |
| 527 | ans[m] = 0; |
| 528 | break; |
| 529 | |
| 530 | default: |
| 531 | ans = nil; |
| 532 | assert(0); |
| 533 | } |
| 534 | return ans; |
| 535 | } |
| 536 | |
| 537 | // Convert buf[0:n], Unicode characters, |
| 538 | // into an emalloc'd null-terminated string in character set chset. |
| 539 | // Use 0x80 for unconvertable characters. |
| 540 | uchar* |
| 541 | fromStr(Rune* buf, int n, int chset) |
| 542 | { |
| 543 | uchar* ans; |
| 544 | int i, lim, m; |
| 545 | Rune ch; |
| 546 | uchar* p; |
| 547 | uchar s[UTFmax]; |
| 548 | |
| 549 | ans = nil; |
| 550 | switch(chset) { |
| 551 | case US_Ascii: |
| 552 | case ISO_8859_1: |
| 553 | ans = (uchar*)emalloc(n+1); |
| 554 | lim = (chset==US_Ascii)? 127 : 255; |
| 555 | for(i = 0; i < n; i++) { |
| 556 | ch = buf[i]; |
| 557 | if(ch > lim) |
| 558 | ch = 0x80; |
| 559 | ans[i] = ch; |
| 560 | } |
| 561 | ans[n] = 0; |
| 562 | break; |
| 563 | |
| 564 | case UTF_8: |
| 565 | m = 0; |
| 566 | for(i = 0; i < n; i++) { |
| 567 | m += runetochar((char*)s, &buf[i]); |
| 568 | } |
| 569 | ans = (uchar*)emalloc(m+1); |
| 570 | p = ans; |
| 571 | for(i = 0; i < n; i++) |
| 572 | p += runetochar((char*)p, &buf[i]); |
| 573 | *p = 0; |
| 574 | break; |
| 575 | |
| 576 | default: |
| 577 | assert(0); |
| 578 | } |
| 579 | return ans; |
| 580 | |
| 581 | } |
| 582 | |
| 583 | // Convert n to emalloc'd String. |
| 584 | Rune* |
| 585 | _ltoStr(int n) |
| 586 | { |
| 587 | int m; |
| 588 | uchar buf[20]; |
| 589 | |
| 590 | m = snprint((char*)buf, sizeof(buf), "%d", n); |
| 591 | return toStr(buf, m, US_Ascii); |
| 592 | } |