blob: 32fd70d0d8add58d512a7b985b490c8075db495b [file] [log] [blame]
wkj7cf289c2004-04-06 19:06:52 +00001#include <u.h>
rsc650deb72004-04-08 19:36:35 +00002#include <limits.h>
wkj7cf289c2004-04-06 19:06:52 +00003#include <libc.h>
4#include <draw.h>
5#include <html.h>
6#include "impl.h"
7
8Rune whitespace[] = { ' ', '\t', '\n', '\r', '\0' };
9Rune notwhitespace[] = { '^', ' ', '\t', '\n', '\r' , '\0'};
10
11// All lists start out like List structure.
12// List itself can be used as list of int.
13int
14_listlen(List* l)
15{
16 int n = 0;
17
18 while(l != nil) {
19 l = l->next;
20 n++;
21 }
22 return n;
23}
24
25// Cons
26List*
27_newlist(int val, List* rest)
28{
29 List* ans;
30
31 ans = (List*)emalloc(sizeof(List));
32 ans->val = val;
33 ans->next = rest;
34 return ans;
35}
36
37// Reverse a list in place
38List*
39_revlist(List* l)
40{
41 List* newl;
42 List* nextl;
43
44 newl = nil;
45 while(l != nil) {
46 nextl = l->next;
47 l->next = newl;
48 newl = l;
49 l = nextl;
50 }
51 return newl;
52}
53
54// The next few routines take a "character class" as argument.
55// e.g., "a-zA-Z", or "^ \t\n"
56// (ranges indicated by - except in first position;
57// ^ is first position means "not in" the following class)
58
59// Splitl splits s[0:n] just before first character of class cl.
60// Answers go in (p1, n1) and (p2, n2).
61// If no split, the whole thing goes in the first component.
62// Note: answers contain pointers into original string.
63void
64_splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
65{
66 Rune* p;
67
68 p = _Strnclass(s, cl, n);
69 *p1 = s;
70 if(p == nil) {
71 *n1 = n;
72 *p2 = nil;
73 *n2 = 0;
74 }
75 else {
76 *p2 = p;
77 *n1 = p-s;
78 *n2 = n-*n1;
79 }
80}
81
82// Splitr splits s[0:n] just after last character of class cl.
83// Answers go in (p1, n1) and (p2, n2).
84// If no split, the whole thing goes in the last component.
85// Note: answers contain pointers into original string.
86void
87_splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
88{
89 Rune* p;
90
91 p = _Strnrclass(s, cl, n);
92 if(p == nil) {
93 *p1 = nil;
94 *n1 = 0;
95 *p2 = s;
96 *n2 = n;
97 }
98 else {
99 *p1 = s;
100 *p2 = p+1;
101 *n1 = *p2-s;
102 *n2 = n-*n1;
103 }
104}
105
106// Splitall splits s[0:n] into parts that are separated by characters from class cl.
107// Each part will have nonzero length.
108// At most alen parts are found, and pointers to their starts go into
109// the strarr array, while their lengths go into the lenarr array.
110// The return value is the number of parts found.
111int
112_splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
113{
114 int i;
115 Rune* p;
116 Rune* q;
117 Rune* slast;
118
119 if(s == nil || n == 0)
120 return 0;
121 i = 0;
122 p = s;
123 slast = s+n;
124 while(p < slast && i < alen) {
125 while(p < slast && _inclass(*p, cl))
126 p++;
127 if(p == slast)
128 break;
129 q = _Strnclass(p, cl, slast-p);
130 if(q == nil)
131 q = slast;
132 assert(q > p && q <= slast);
133 strarr[i] = p;
134 lenarr[i] = q-p;
135 i++;
136 p = q;
137 }
138 return i;
139}
140
141// Find part of s that excludes leading and trailing whitespace,
142// and return that part in *pans (and its length in *panslen).
143void
144_trimwhite(Rune* s, int n, Rune** pans, int* panslen)
145{
146 Rune* p;
147 Rune* q;
148
149 p = nil;
150 if(n > 0) {
151 p = _Strnclass(s, notwhitespace, n);
152 if(p != nil) {
153 q = _Strnrclass(s, notwhitespace, n);
154 assert(q != nil);
155 n = q+1-p;
156 }
157 }
158 *pans = p;
159 *panslen = n;
160}
161
162// _Strclass returns a pointer to the first element of s that is
163// a member of class cl, nil if none.
164Rune*
165_Strclass(Rune* s, Rune* cl)
166{
167 Rune* p;
168
169 for(p = s; *p != 0; p++)
170 if(_inclass(*p, cl))
171 return p;
172 return nil;
173}
174
175// _Strnclass returns a pointer to the first element of s[0:n] that is
176// a member of class cl, nil if none.
177Rune*
178_Strnclass(Rune* s, Rune* cl, int n)
179{
180 Rune* p;
181
182 for(p = s; n-- && *p != 0; p++)
183 if(_inclass(*p, cl))
184 return p;
185 return nil;
186}
187
188// _Strrclass returns a pointer to the last element of s that is
189// a member of class cl, nil if none
190Rune*
191_Strrclass(Rune* s, Rune* cl)
192{
193 Rune* p;
194
195 if(s == nil || *s == 0)
196 return nil;
197 p = s + runestrlen(s) - 1;
198 while(p >= s) {
199 if(_inclass(*p, cl))
200 return p;
201 p--;
202 };
203 return nil;
204}
205
206// _Strnrclass returns a pointer to the last element of s[0:n] that is
207// a member of class cl, nil if none
208Rune*
209_Strnrclass(Rune* s, Rune* cl, int n)
210{
211 Rune* p;
212
213 if(s == nil || *s == 0 || n == 0)
214 return nil;
215 p = s + n - 1;
216 while(p >= s) {
217 if(_inclass(*p, cl))
218 return p;
219 p--;
220 };
221 return nil;
222}
223
224// Is c in the class cl?
225int
226_inclass(Rune c, Rune* cl)
227{
228 int n;
229 int ans;
230 int negate;
231 int i;
232
233 n = _Strlen(cl);
234 if(n == 0)
235 return 0;
236 ans = 0;
237 negate = 0;
238 if(cl[0] == '^') {
239 negate = 1;
240 cl++;
241 n--;
242 }
243 for(i = 0; i < n; i++) {
244 if(cl[i] == '-' && i > 0 && i < n - 1) {
245 if(c >= cl[i - 1] && c <= cl[i + 1]) {
246 ans = 1;
247 break;
248 }
249 i++;
250 }
251 else if(c == cl[i]) {
252 ans = 1;
253 break;
254 }
255 }
256 if(negate)
257 ans = !ans;
258 return ans;
259}
260
261// Is pre a prefix of s?
262int
263_prefix(Rune* pre, Rune* s)
264{
265 int ns;
266 int n;
267 int k;
268
269 ns = _Strlen(s);
270 n = _Strlen(pre);
271 if(ns < n)
272 return 0;
273 for(k = 0; k < n; k++) {
274 if(pre[k] != s[k])
275 return 0;
276 }
277 return 1;
278}
279
280// Number of runes in (null-terminated) s
281int
282_Strlen(Rune* s)
283{
284 if(s == nil)
285 return 0;
286 return runestrlen(s);
287}
288
289// -1, 0, 1 as s1 is lexicographically less, equal greater than s2
290int
291_Strcmp(Rune *s1, Rune *s2)
292{
293 if(s1 == nil)
294 return (s2 == nil || *s2 == 0) ? 0 : -1;
295 if(s2 == nil)
296 return (*s1 == 0) ? 0 : 1;
297 return runestrcmp(s1, s2);
298}
299
300// Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars).
301// Also, do a case-insensitive match, assuming s2
302// has no chars in [A-Z], only their lowercase versions.
303// (This routine is used for in-place keyword lookup, where s2 is in a keyword
304// list and s1 is some substring, possibly mixed-case, in a buffer.)
305int
306_Strncmpci(Rune *s1, int n1, Rune *s2)
307{
308 Rune c1, c2;
309
310 for(;;) {
311 if(n1-- == 0) {
312 if(*s2 == 0)
313 return 0;
314 return -1;
315 }
316 c1 = *s1++;
317 c2 = *s2++;
318 if(c1 >= 'A' && c1 <= 'Z')
319 c1 = c1 - 'A' + 'a';
320 if(c1 != c2) {
321 if(c1 > c2)
322 return 1;
323 return -1;
324 }
325 }
326}
327
328// emalloc and copy
329Rune*
330_Strdup(Rune* s)
331{
332 if(s == nil)
333 return nil;
334 return _Strndup(s, runestrlen(s));
335}
336
337// emalloc and copy n chars of s (assume s is at least that long),
338// and add 0 terminator.
339// Return nil if n==0.
340Rune*
341_Strndup(Rune* s, int n)
342{
343 Rune* ans;
344
345 if(n <= 0)
346 return nil;
347 ans = _newstr(n);
348 memmove(ans, s, n*sizeof(Rune));
349 ans[n] = 0;
350 return ans;
351}
352// emalloc enough room for n Runes, plus 1 null terminator.
353// (Not initialized to anything.)
354Rune*
355_newstr(int n)
356{
357 return (Rune*)emalloc((n+1)*sizeof(Rune));
358}
359
360// emalloc and copy s+t
361Rune*
362_Strdup2(Rune* s, Rune* t)
363{
364 int ns, nt;
365 Rune* ans;
366 Rune* p;
367
368 ns = _Strlen(s);
369 nt = _Strlen(t);
370 if(ns+nt == 0)
371 return nil;
372 ans = _newstr(ns+nt);
373 p = _Stradd(ans, s, ns);
374 p = _Stradd(p, t, nt);
375 *p = 0;
376 return ans;
377}
378
379// Return emalloc'd substring s[start:stop],
380Rune*
381_Strsubstr(Rune* s, int start, int stop)
382{
383 Rune* t;
384
385 if(start == stop)
386 return nil;
387 t = _Strndup(s+start, stop-start);
388 return t;
389}
390
391// Copy n chars to s1 from s2, and return s1+n
392Rune*
393_Stradd(Rune* s1, Rune* s2, int n)
394{
395 if(n == 0)
396 return s1;
397 memmove(s1, s2, n*sizeof(Rune));
398 return s1+n;
399}
400
401// Like strtol, but converting from Rune* string
402
403//#define LONG_MAX 2147483647L
404//#define LONG_MIN -2147483648L
405
406long
407_Strtol(Rune* nptr, Rune** endptr, int base)
408{
409 Rune* p;
410 long n, nn;
411 int c, ovfl, v, neg, ndig;
412
413 p = nptr;
414 neg = 0;
415 n = 0;
416 ndig = 0;
417 ovfl = 0;
418
419 /*
420 * White space
421 */
422 for(;;p++){
423 switch(*p){
424 case ' ':
425 case '\t':
426 case '\n':
427 case '\f':
428 case '\r':
429 case '\v':
430 continue;
431 }
432 break;
433 }
434
435 /*
436 * Sign
437 */
438 if(*p=='-' || *p=='+')
439 if(*p++ == '-')
440 neg = 1;
441
442 /*
443 * Base
444 */
445 if(base==0){
446 if(*p != '0')
447 base = 10;
448 else{
449 base = 8;
450 if(p[1]=='x' || p[1]=='X'){
451 p += 2;
452 base = 16;
453 }
454 }
455 }else if(base==16 && *p=='0'){
456 if(p[1]=='x' || p[1]=='X')
457 p += 2;
458 }else if(base<0 || 36<base)
459 goto Return;
460
461 /*
462 * Non-empty sequence of digits
463 */
464 for(;; p++,ndig++){
465 c = *p;
466 v = base;
467 if('0'<=c && c<='9')
468 v = c - '0';
469 else if('a'<=c && c<='z')
470 v = c - 'a' + 10;
471 else if('A'<=c && c<='Z')
472 v = c - 'A' + 10;
473 if(v >= base)
474 break;
475 nn = n*base + v;
476 if(nn < n)
477 ovfl = 1;
478 n = nn;
479 }
480
481 Return:
482 if(ndig == 0)
483 p = nptr;
484 if(endptr)
485 *endptr = p;
486 if(ovfl){
487 if(neg)
488 return LONG_MIN;
489 return LONG_MAX;
490 }
491 if(neg)
492 return -n;
493 return n;
494}
495
496// Convert buf[0:n], bytes whose character set is chset,
497// into a emalloc'd null-terminated Unicode string.
498Rune*
499toStr(uchar* buf, int n, int chset)
500{
501 int i;
502 int m;
503 Rune ch;
504 Rune* ans;
505
506 switch(chset) {
507 case US_Ascii:
508 case ISO_8859_1:
509 ans = (Rune*)emalloc((n+1)*sizeof(Rune));
510 for(i = 0; i < n; i++)
511 ans[i] = buf[i];
512 ans[n] = 0;
513 break;
514
515 case UTF_8:
516 m = 0;
517 for(i = 0; i < n; ) {
518 i += chartorune(&ch, (char*)(buf+i));
519 m++;
520 }
521 ans = (Rune*)emalloc((m+1)*sizeof(Rune));
522 m = 0;
523 for(i = 0; i < n; ) {
524 i += chartorune(&ch, (char*)(buf+i));
525 ans[m++] = ch;
526 }
527 ans[m] = 0;
528 break;
529
530 default:
531 ans = nil;
532 assert(0);
533 }
534 return ans;
535}
536
537// Convert buf[0:n], Unicode characters,
538// into an emalloc'd null-terminated string in character set chset.
539// Use 0x80 for unconvertable characters.
540uchar*
541fromStr(Rune* buf, int n, int chset)
542{
543 uchar* ans;
544 int i, lim, m;
545 Rune ch;
546 uchar* p;
547 uchar s[UTFmax];
548
549 ans = nil;
550 switch(chset) {
551 case US_Ascii:
552 case ISO_8859_1:
553 ans = (uchar*)emalloc(n+1);
554 lim = (chset==US_Ascii)? 127 : 255;
555 for(i = 0; i < n; i++) {
556 ch = buf[i];
557 if(ch > lim)
558 ch = 0x80;
559 ans[i] = ch;
560 }
561 ans[n] = 0;
562 break;
563
564 case UTF_8:
565 m = 0;
566 for(i = 0; i < n; i++) {
567 m += runetochar((char*)s, &buf[i]);
568 }
569 ans = (uchar*)emalloc(m+1);
570 p = ans;
571 for(i = 0; i < n; i++)
572 p += runetochar((char*)p, &buf[i]);
573 *p = 0;
574 break;
575
576 default:
577 assert(0);
578 }
579 return ans;
580
581}
582
583// Convert n to emalloc'd String.
584Rune*
585_ltoStr(int n)
586{
587 int m;
588 uchar buf[20];
589
590 m = snprint((char*)buf, sizeof(buf), "%d", n);
591 return toStr(buf, m, US_Ascii);
592}