blob: a78c9c66496d54118c6f150daf516610e6982faf [file] [log] [blame]
rsc39405062005-01-13 04:56:07 +00001/*
2 * sed -- stream editor
3 *
4 *
5 */
6#include <u.h>
7#include <libc.h>
8#include <bio.h>
9#include <regexp.h>
10
11enum {
12 DEPTH = 20, /* max nesting depth of {} */
13 MAXCMDS = 512, /* max sed commands */
14 ADDSIZE = 10000, /* size of add & read buffer */
15 MAXADDS = 20, /* max pending adds and reads */
16 LBSIZE = 8192, /* input line size */
17 LABSIZE = 50, /* max label name size */
18 MAXSUB = 10, /* max number of sub reg exp */
rsccbeb0b22006-04-01 19:24:03 +000019 MAXFILES = 120 /* max output files */
rsc39405062005-01-13 04:56:07 +000020};
21 /* An address is a line #, a R.E., "$", a reference to the last
22 * R.E., or nothing.
23 */
24typedef struct {
25 enum {
26 A_NONE,
27 A_DOL,
28 A_LINE,
29 A_RE,
rsccbeb0b22006-04-01 19:24:03 +000030 A_LAST
rsc39405062005-01-13 04:56:07 +000031 }type;
32 union {
33 long line; /* Line # */
34 Reprog *rp; /* Compiled R.E. */
35 } u;
36} Addr;
37
38typedef struct SEDCOM {
39 Addr ad1; /* optional start address */
40 Addr ad2; /* optional end address */
41 union {
42 Reprog *re1; /* compiled R.E. */
43 Rune *text; /* added text or file name */
44 struct SEDCOM *lb1; /* destination command of branch */
45 } u;
46 Rune *rhs; /* Right-hand side of substitution */
47 Biobuf* fcode; /* File ID for read and write */
48 char command; /* command code -see below */
49 char gfl; /* 'Global' flag for substitutions */
50 char pfl; /* 'print' flag for substitutions */
51 char active; /* 1 => data between start and end */
52 char negfl; /* negation flag */
53} SedCom;
54
55 /* Command Codes for field SedCom.command */
56#define ACOM 01
57#define BCOM 020
58#define CCOM 02
59#define CDCOM 025
60#define CNCOM 022
61#define COCOM 017
62#define CPCOM 023
63#define DCOM 03
64#define ECOM 015
65#define EQCOM 013
66#define FCOM 016
67#define GCOM 027
68#define CGCOM 030
69#define HCOM 031
70#define CHCOM 032
71#define ICOM 04
72#define LCOM 05
73#define NCOM 012
74#define PCOM 010
75#define QCOM 011
76#define RCOM 06
77#define SCOM 07
78#define TCOM 021
79#define WCOM 014
80#define CWCOM 024
81#define YCOM 026
82#define XCOM 033
83
84
85typedef struct label { /* Label symbol table */
86 Rune asc[9]; /* Label name */
87 SedCom *chain;
88 SedCom *address; /* Command associated with label */
89} Label;
90
91typedef struct FILE_CACHE { /* Data file control block */
92 struct FILE_CACHE *next; /* Forward Link */
93 char *name; /* Name of file */
94} FileCache;
95
96SedCom pspace[MAXCMDS]; /* Command storage */
97SedCom *pend = pspace+MAXCMDS; /* End of command storage */
98SedCom *rep = pspace; /* Current fill point */
99
100Reprog *lastre = 0; /* Last regular expression */
101Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/
102
103Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */
104Rune *addend = addspace+ADDSIZE;
105
106SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */
107SedCom **aptr = abuf;
108
109struct { /* Sed program input control block */
110 enum PTYPE /* Either on command line or in file */
111 { P_ARG,
112 P_FILE
113 } type;
114 union PCTL { /* Pointer to data */
115 Biobuf *bp;
116 char *curr;
117 } pctl;
118} prog;
119
120Rune genbuf[LBSIZE]; /* Miscellaneous buffer */
121
122FileCache *fhead = 0; /* Head of File Cache Chain */
123FileCache *ftail = 0; /* Tail of File Cache Chain */
124
125Rune *loc1; /* Start of pattern match */
126Rune *loc2; /* End of pattern match */
127Rune seof; /* Pattern delimiter char */
128
129Rune linebuf[LBSIZE+1]; /* Input data buffer */
130Rune *lbend = linebuf+LBSIZE; /* End of buffer */
131Rune *spend = linebuf; /* End of input data */
132Rune *cp; /* Current scan point in linebuf */
133
134Rune holdsp[LBSIZE+1]; /* Hold buffer */
135Rune *hend = holdsp+LBSIZE; /* End of hold buffer */
136Rune *hspend = holdsp; /* End of hold data */
137
138int nflag; /* Command line flags */
139int gflag;
rsca09e8572006-06-26 00:03:55 +0000140int lflag;
rsc39405062005-01-13 04:56:07 +0000141
142int dolflag; /* Set when at true EOF */
143int sflag; /* Set when substitution done */
144int jflag; /* Set when jump required */
145int delflag; /* Delete current line when set */
146
147long lnum = 0; /* Input line count */
148
149char fname[MAXFILES][40]; /* File name cache */
150Biobuf *fcode[MAXFILES]; /* File ID cache */
151int nfiles = 0; /* Cache fill point */
152
153Biobuf fout; /* Output stream */
154Biobuf bstdin; /* Default input */
155Biobuf* f = 0; /* Input data */
156
157Label ltab[LABSIZE]; /* Label name symbol table */
158Label *labend = ltab+LABSIZE; /* End of label table */
159Label *lab = ltab+1; /* Current Fill point */
160
161int depth = 0; /* {} stack pointer */
162
163Rune bad; /* Dummy err ptr reference */
164Rune *badp = &bad;
165
166
167char CGMES[] = "Command garbled: %S";
168char TMMES[] = "Too much text: %S";
169char LTL[] = "Label too long: %S";
170char AD0MES[] = "No addresses allowed: %S";
171char AD1MES[] = "Only one address allowed: %S";
172
173void address(Addr *);
174void arout(void);
175int cmp(char *, char *);
176int rcmp(Rune *, Rune *);
177void command(SedCom *);
178Reprog *compile(void);
179Rune *compsub(Rune *, Rune *);
180void dechain(void);
181void dosub(Rune *);
182int ecmp(Rune *, Rune *, int);
183void enroll(char *);
184void errexit(void);
185int executable(SedCom *);
186void execute(void);
187void fcomp(void);
188long getrune(void);
189Rune *gline(Rune *);
190int match(Reprog *, Rune *);
191void newfile(enum PTYPE, char *);
192int opendata(void);
193Biobuf *open_file(char *);
194Rune *place(Rune *, Rune *, Rune *);
195void quit(char *, char *);
196int rline(Rune *, Rune *);
197Label *search(Label *);
198int substitute(SedCom *);
199char *text(char *);
200Rune *stext(Rune *, Rune *);
201int ycomp(SedCom *);
202char * trans(int c);
203void putline(Biobuf *bp, Rune *buf, int n);
204
205void
206main(int argc, char **argv)
207{
208 int compfl;
209
210 lnum = 0;
211 Binit(&fout, 1, OWRITE);
212 fcode[nfiles++] = &fout;
213 compfl = 0;
214
215 if(argc == 1)
216 exits(0);
217 ARGBEGIN{
218 case 'n':
219 nflag++;
220 continue;
221 case 'f':
222 if(argc <= 1)
223 quit("no pattern-file", 0);
224 newfile(P_FILE, ARGF());
225 fcomp();
226 compfl = 1;
227 continue;
228 case 'e':
229 if (argc <= 1)
230 quit("missing pattern", 0);
231 newfile(P_ARG, ARGF());
232 fcomp();
233 compfl = 1;
234 continue;
235 case 'g':
236 gflag++;
237 continue;
rsca09e8572006-06-26 00:03:55 +0000238 case 'l':
239 lflag++;
240 continue;
rsc39405062005-01-13 04:56:07 +0000241 default:
242 fprint(2, "sed: Unknown flag: %c\n", ARGC());
243 continue;
244 } ARGEND
245
246 if(compfl == 0) {
247 if (--argc < 0)
248 quit("missing pattern", 0);
249 newfile(P_ARG, *argv++);
250 fcomp();
251 }
252
253 if(depth)
254 quit("Too many {'s", 0);
255
256 ltab[0].address = rep;
257
258 dechain();
259
260 if(argc <= 0)
261 enroll(0); /* Add stdin to cache */
262 else while(--argc >= 0) {
263 enroll(*argv++);
264 }
265 execute();
266 exits(0);
267}
268void
269fcomp(void)
270{
271 Rune *tp;
272 SedCom *pt, *pt1;
273 int i;
274 Label *lpt;
275
276 static Rune *p = addspace;
277 static SedCom **cmpend[DEPTH]; /* stack of {} operations */
278
279 while (rline(linebuf, lbend) >= 0) {
280 cp = linebuf;
281comploop:
282 while(*cp == ' ' || *cp == '\t')
283 cp++;
284 if(*cp == '\0' || *cp == '#')
285 continue;
286 if(*cp == ';') {
287 cp++;
288 goto comploop;
289 }
290
291 address(&rep->ad1);
292 if (rep->ad1.type != A_NONE) {
293 if (rep->ad1.type == A_LAST) {
294 if (!lastre)
295 quit("First RE may not be null", 0);
296 rep->ad1.type = A_RE;
297 rep->ad1.u.rp = lastre;
298 }
299 if(*cp == ',' || *cp == ';') {
300 cp++;
301 address(&rep->ad2);
302 if (rep->ad2.type == A_LAST) {
303 rep->ad1.type = A_RE;
304 rep->ad2.u.rp = lastre;
305 }
306 } else
307 rep->ad2.type = A_NONE;
308 }
309 while(*cp == ' ' || *cp == '\t')
310 cp++;
311
312swit:
313 switch(*cp++) {
314
315 default:
316 quit("Unrecognized command: %S", (char *)linebuf);
317
318 case '!':
319 rep->negfl = 1;
320 goto swit;
321
322 case '{':
323 rep->command = BCOM;
324 rep->negfl = !(rep->negfl);
325 cmpend[depth++] = &rep->u.lb1;
326 if(++rep >= pend)
327 quit("Too many commands: %S", (char *) linebuf);
328 if(*cp == '\0') continue;
329 goto comploop;
330
331 case '}':
332 if(rep->ad1.type != A_NONE)
333 quit(AD0MES, (char *) linebuf);
334 if(--depth < 0)
335 quit("Too many }'s", 0);
336 *cmpend[depth] = rep;
337 if(*cp == 0) continue;
338 goto comploop;
339
340 case '=':
341 rep->command = EQCOM;
342 if(rep->ad2.type != A_NONE)
343 quit(AD1MES, (char *) linebuf);
344 break;
345
346 case ':':
347 if(rep->ad1.type != A_NONE)
348 quit(AD0MES, (char *) linebuf);
349
350 while(*cp == ' ')
351 cp++;
352 tp = lab->asc;
353 while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') {
354 *tp++ = *cp++;
355 if(tp >= &(lab->asc[8]))
356 quit(LTL, (char *) linebuf);
357 }
358 *tp = '\0';
359
360 if(lpt = search(lab)) {
361 if(lpt->address)
362 quit("Duplicate labels: %S", (char *) linebuf);
363 } else {
364 lab->chain = 0;
365 lpt = lab;
366 if(++lab >= labend)
367 quit("Too many labels: %S", (char *) linebuf);
368 }
369 lpt->address = rep;
370 if (*cp == '#')
371 continue;
372 rep--; /* reuse this slot */
373 break;
374
375 case 'a':
376 rep->command = ACOM;
377 if(rep->ad2.type != A_NONE)
378 quit(AD1MES, (char *) linebuf);
379 if(*cp == '\\') cp++;
380 if(*cp++ != '\n')
381 quit(CGMES, (char *) linebuf);
382 rep->u.text = p;
383 p = stext(p, addend);
384 break;
385 case 'c':
386 rep->command = CCOM;
387 if(*cp == '\\') cp++;
388 if(*cp++ != '\n')
389 quit(CGMES, (char *) linebuf);
390 rep->u.text = p;
391 p = stext(p, addend);
392 break;
393 case 'i':
394 rep->command = ICOM;
395 if(rep->ad2.type != A_NONE)
396 quit(AD1MES, (char *) linebuf);
397 if(*cp == '\\') cp++;
398 if(*cp++ != '\n')
399 quit(CGMES, (char *) linebuf);
400 rep->u.text = p;
401 p = stext(p, addend);
402 break;
403
404 case 'g':
405 rep->command = GCOM;
406 break;
407
408 case 'G':
409 rep->command = CGCOM;
410 break;
411
412 case 'h':
413 rep->command = HCOM;
414 break;
415
416 case 'H':
417 rep->command = CHCOM;
418 break;
419
420 case 't':
421 rep->command = TCOM;
422 goto jtcommon;
423
424 case 'b':
425 rep->command = BCOM;
426jtcommon:
427 while(*cp == ' ')cp++;
428 if(*cp == '\0') {
429 if(pt = ltab[0].chain) {
430 while(pt1 = pt->u.lb1)
431 pt = pt1;
432 pt->u.lb1 = rep;
433 } else
434 ltab[0].chain = rep;
435 break;
436 }
437 tp = lab->asc;
438 while((*tp++ = *cp++))
439 if(tp >= &(lab->asc[8]))
440 quit(LTL, (char *) linebuf);
441 cp--;
442 tp[-1] = '\0';
443
444 if(lpt = search(lab)) {
445 if(lpt->address) {
446 rep->u.lb1 = lpt->address;
447 } else {
448 pt = lpt->chain;
449 while(pt1 = pt->u.lb1)
450 pt = pt1;
451 pt->u.lb1 = rep;
452 }
453 } else {
454 lab->chain = rep;
455 lab->address = 0;
456 if(++lab >= labend)
457 quit("Too many labels: %S",
458 (char *) linebuf);
459 }
460 break;
461
462 case 'n':
463 rep->command = NCOM;
464 break;
465
466 case 'N':
467 rep->command = CNCOM;
468 break;
469
470 case 'p':
471 rep->command = PCOM;
472 break;
473
474 case 'P':
475 rep->command = CPCOM;
476 break;
477
478 case 'r':
479 rep->command = RCOM;
480 if(rep->ad2.type != A_NONE)
481 quit(AD1MES, (char *) linebuf);
482 if(*cp++ != ' ')
483 quit(CGMES, (char *) linebuf);
484 rep->u.text = p;
485 p = stext(p, addend);
486 break;
487
488 case 'd':
489 rep->command = DCOM;
490 break;
491
492 case 'D':
493 rep->command = CDCOM;
494 rep->u.lb1 = pspace;
495 break;
496
497 case 'q':
498 rep->command = QCOM;
499 if(rep->ad2.type != A_NONE)
500 quit(AD1MES, (char *) linebuf);
501 break;
502
503 case 'l':
504 rep->command = LCOM;
505 break;
506
507 case 's':
508 rep->command = SCOM;
509 seof = *cp++;
510 if ((rep->u.re1 = compile()) == 0) {
511 if(!lastre)
512 quit("First RE may not be null.", 0);
513 rep->u.re1 = lastre;
514 }
515 rep->rhs = p;
516 if((p = compsub(p, addend)) == 0)
517 quit(CGMES, (char *) linebuf);
518 if(*cp == 'g') {
519 cp++;
520 rep->gfl++;
521 } else if(gflag)
522 rep->gfl++;
523
524 if(*cp == 'p') {
525 cp++;
526 rep->pfl = 1;
527 }
528
529 if(*cp == 'P') {
530 cp++;
531 rep->pfl = 2;
532 }
533
534 if(*cp == 'w') {
535 cp++;
536 if(*cp++ != ' ')
537 quit(CGMES, (char *) linebuf);
538 text(fname[nfiles]);
539 for(i = nfiles - 1; i >= 0; i--)
540 if(cmp(fname[nfiles],fname[i]) == 0) {
541 rep->fcode = fcode[i];
542 goto done;
543 }
544 if(nfiles >= MAXFILES)
545 quit("Too many files in w commands 1", 0);
546 rep->fcode = open_file(fname[nfiles]);
547 }
548 break;
549
550 case 'w':
551 rep->command = WCOM;
552 if(*cp++ != ' ')
553 quit(CGMES, (char *) linebuf);
554 text(fname[nfiles]);
555 for(i = nfiles - 1; i >= 0; i--)
556 if(cmp(fname[nfiles], fname[i]) == 0) {
557 rep->fcode = fcode[i];
558 goto done;
559 }
560 if(nfiles >= MAXFILES){
561 fprint(2, "sed: Too many files in w commands 2 \n");
562 fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES);
563 errexit();
564 }
565 rep->fcode = open_file(fname[nfiles]);
566 break;
567
568 case 'x':
569 rep->command = XCOM;
570 break;
571
572 case 'y':
573 rep->command = YCOM;
574 seof = *cp++;
575 if (ycomp(rep) == 0)
576 quit(CGMES, (char *) linebuf);
577 break;
578
579 }
580done:
581 if(++rep >= pend)
582 quit("Too many commands, last: %S", (char *) linebuf);
583
584 if(*cp++ != '\0') {
585 if(cp[-1] == ';')
586 goto comploop;
587 quit(CGMES, (char *) linebuf);
588 }
589
590 }
591}
592
593Biobuf *
594open_file(char *name)
595{
596 Biobuf *bp;
597 int fd;
598
599 if ((bp = malloc(sizeof(Biobuf))) == 0)
600 quit("Out of memory", 0);
601 if ((fd = open(name, OWRITE)) < 0 &&
602 (fd = create(name, OWRITE, 0666)) < 0)
603 quit("Cannot create %s", name);
604 Binit(bp, fd, OWRITE);
605 Bseek(bp, 0, 2);
606 fcode[nfiles++] = bp;
607 return bp;
608}
609
610Rune *
611compsub(Rune *rhs, Rune *end)
612{
613 Rune r;
614
615 while ((r = *cp++) != '\0') {
616 if(r == '\\') {
617 if (rhs < end)
Russ Cox0cadb432009-09-11 17:03:06 -0400618 *rhs++ = Runemax;
rsc39405062005-01-13 04:56:07 +0000619 else
620 return 0;
621 r = *cp++;
622 if(r == 'n')
623 r = '\n';
624 } else {
625 if(r == seof) {
626 if (rhs < end)
627 *rhs++ = '\0';
628 else
629 return 0;
630 return rhs;
631 }
632 }
633 if (rhs < end)
634 *rhs++ = r;
635 else
636 return 0;
637
638 }
639 return 0;
640}
641
642Reprog *
643compile(void)
644{
645 Rune c;
646 char *ep;
647 char expbuf[512];
648
649 if((c = *cp++) == seof) /* '//' */
650 return 0;
651 ep = expbuf;
652 do {
653 if (c == 0 || c == '\n')
654 quit(TMMES, (char *) linebuf);
655 if (c == '\\') {
656 if (ep >= expbuf+sizeof(expbuf))
657 quit(TMMES, (char *) linebuf);
658 ep += runetochar(ep, &c);
659 if ((c = *cp++) == 'n')
660 c = '\n';
661 }
662 if (ep >= expbuf+sizeof(expbuf))
663 quit(TMMES, (char *) linebuf);
664 ep += runetochar(ep, &c);
665 } while ((c = *cp++) != seof);
666 *ep = 0;
667 return lastre = regcomp(expbuf);
668}
669
670void
671regerror(char *s)
672{
673 USED(s);
674 quit(CGMES, (char *) linebuf);
675}
676
677void
678newfile(enum PTYPE type, char *name)
679{
680 if (type == P_ARG)
681 prog.pctl.curr = name;
682 else if ((prog.pctl.bp = Bopen(name, OREAD)) == 0)
683 quit("Cannot open pattern-file: %s\n", name);
684 prog.type = type;
685}
686
687int
688rline(Rune *buf, Rune *end)
689{
690 long c;
691 Rune r;
692
693 while ((c = getrune()) >= 0) {
694 r = c;
695 if (r == '\\') {
696 if (buf <= end)
697 *buf++ = r;
698 if ((c = getrune()) < 0)
699 break;
700 r = c;
701 } else if (r == '\n') {
702 *buf = '\0';
703 return(1);
704 }
705 if (buf <= end)
706 *buf++ = r;
707 }
708 *buf = '\0';
709 return(-1);
710}
711
712long
713getrune(void)
714{
715 char *p;
716 long c;
717 Rune r;
718
719 if (prog.type == P_ARG) {
720 if ((p = prog.pctl.curr) != 0) {
721 if (*p) {
722 prog.pctl.curr += chartorune(&r, p);
723 c = r;
724 } else {
725 c = '\n'; /* fake an end-of-line */
726 prog.pctl.curr = 0;
727 }
728 } else
729 c = -1;
730 } else if ((c = Bgetrune(prog.pctl.bp)) < 0)
731 Bterm(prog.pctl.bp);
732 return c;
733}
734
735void
736address(Addr *ap)
737{
738 int c;
739 long lno;
740
741 if((c = *cp++) == '$')
742 ap->type = A_DOL;
743 else if(c == '/') {
744 seof = c;
745 if (ap->u.rp = compile())
746 ap->type = A_RE;
747 else
748 ap->type = A_LAST;
749 }
750 else if (c >= '0' && c <= '9') {
751 lno = c-'0';
752 while ((c = *cp) >= '0' && c <= '9')
753 lno = lno*10 + *cp++-'0';
754 if(!lno)
755 quit("line number 0 is illegal",0);
756 ap->type = A_LINE;
757 ap->u.line = lno;
758 }
759 else {
760 cp--;
761 ap->type = A_NONE;
762 }
763}
764
765int
766cmp(char *a, char *b) /* compare characters */
767{
768 while(*a == *b++)
769 if (*a == '\0')
770 return(0);
771 else a++;
772 return(1);
773}
774
775int
776rcmp(Rune *a, Rune *b) /* compare runes */
777{
778 while(*a == *b++)
779 if (*a == '\0')
780 return(0);
781 else a++;
782 return(1);
783}
784
785char *
786text(char *p) /* extract character string */
787{
788 Rune r;
789
790 while(*cp == '\t' || *cp == ' ')
791 cp++;
792 while (*cp) {
793 if ((r = *cp++) == '\\')
794 if ((r = *cp++) == 0)
795 break;;
796 if (r == '\n')
797 while (*cp == '\t' || *cp == ' ')
798 cp++;
799 p += runetochar(p, &r);
800 }
801 *p++ = '\0';
802 return p;
803}
804
805Rune *
806stext(Rune *p, Rune *end) /* extract rune string */
807{
808 while(*cp == '\t' || *cp == ' ')
809 cp++;
810 while (*cp) {
811 if (*cp == '\\')
812 if (*++cp == 0)
813 break;
814 if (p >= end-1)
815 quit(TMMES, (char *) linebuf);
816 if ((*p++ = *cp++) == '\n')
817 while(*cp == '\t' || *cp == ' ')
818 cp++;
819 }
820 *p++ = 0;
821 return p;
822}
823
824
825Label *
826search (Label *ptr)
827{
828 Label *rp;
829
830 for (rp = ltab; rp < ptr; rp++)
831 if(rcmp(rp->asc, ptr->asc) == 0)
832 return(rp);
833 return(0);
834}
835
836void
837dechain(void)
838{
839 Label *lptr;
840 SedCom *rptr, *trptr;
841
842 for(lptr = ltab; lptr < lab; lptr++) {
843
844 if(lptr->address == 0)
845 quit("Undefined label: %S", (char *) lptr->asc);
846
847 if(lptr->chain) {
848 rptr = lptr->chain;
849 while(trptr = rptr->u.lb1) {
850 rptr->u.lb1 = lptr->address;
851 rptr = trptr;
852 }
853 rptr->u.lb1 = lptr->address;
854 }
855 }
856}
857
858int
859ycomp(SedCom *r)
860{
861 int i;
862 Rune *rp;
863 Rune c, *tsp, highc;
864 Rune *sp;
865
866 highc = 0;
867 for(tsp = cp; *tsp != seof; tsp++) {
868 if(*tsp == '\\')
869 tsp++;
870 if(*tsp == '\n' || *tsp == '\0')
871 return(0);
872 if (*tsp > highc) highc = *tsp;
873 }
874 tsp++;
875 if ((rp = r->u.text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0)
876 quit("Out of memory", 0);
877 *rp++ = highc; /* save upper bound */
878 for (i = 0; i <= highc; i++)
879 rp[i] = i;
880 sp = cp;
881 while((c = *sp++) != seof) {
882 if(c == '\\' && *sp == 'n') {
883 sp++;
884 c = '\n';
885 }
886 if((rp[c] = *tsp++) == '\\' && *tsp == 'n') {
887 rp[c] = '\n';
888 tsp++;
889 }
890 if(rp[c] == seof || rp[c] == '\0') {
891 free(r->u.re1);
892 r->u.re1 = 0;
893 return(0);
894 }
895 }
896 if(*tsp != seof) {
897 free(r->u.re1);
898 r->u.re1 = 0;
899 return(0);
900 }
901 cp = tsp+1;
902 return(1);
903}
904
905void
906execute(void)
907{
908 SedCom *ipc;
909
910 while (spend = gline(linebuf)){
911 for(ipc = pspace; ipc->command; ) {
912 if (!executable(ipc)) {
913 ipc++;
914 continue;
915 }
916 command(ipc);
917
918 if(delflag)
919 break;
920 if(jflag) {
921 jflag = 0;
922 if((ipc = ipc->u.lb1) == 0)
923 break;
924 } else
925 ipc++;
926
927 }
928 if(!nflag && !delflag)
929 putline(&fout, linebuf, spend-linebuf);
930 if(aptr > abuf) {
931 arout();
932 }
933 delflag = 0;
934 }
935}
936 /* determine if a statement should be applied to an input line */
937int
938executable(SedCom *ipc)
939{
940 if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */
941 if (ipc->active == 1) /* Second line */
942 ipc->active = 2;
943 switch(ipc->ad2.type) {
944 case A_NONE: /* No second addr; use first */
945 ipc->active = 0;
946 break;
947 case A_DOL: /* Accept everything */
948 return !ipc->negfl;
949 case A_LINE: /* Line at end of range? */
950 if (lnum <= ipc->ad2.u.line) {
951 if (ipc->ad2.u.line == lnum)
952 ipc->active = 0;
953 return !ipc->negfl;
954 }
955 ipc->active = 0; /* out of range */
956 return ipc->negfl;
957 case A_RE: /* Check for matching R.E. */
958 if (match(ipc->ad2.u.rp, linebuf))
959 ipc->active = 0;
960 return !ipc->negfl;
961 default: /* internal error */
962 quit("Internal error", 0);
963 }
964 }
965 switch (ipc->ad1.type) { /* Check first address */
966 case A_NONE: /* Everything matches */
967 return !ipc->negfl;
968 case A_DOL: /* Only last line */
969 if (dolflag)
970 return !ipc->negfl;
971 break;
972 case A_LINE: /* Check line number */
973 if (ipc->ad1.u.line == lnum) {
974 ipc->active = 1; /* In range */
975 return !ipc->negfl;
976 }
977 break;
978 case A_RE: /* Check R.E. */
979 if (match(ipc->ad1.u.rp, linebuf)) {
980 ipc->active = 1; /* In range */
981 return !ipc->negfl;
982 }
983 break;
984 default:
985 quit("Internal error", 0);
986 }
987 return ipc->negfl;
988}
989
990int
991match(Reprog *pattern, Rune *buf)
992{
993 if (!pattern)
994 return 0;
995 subexp[0].s.rsp = buf;
996 subexp[0].e.rep = 0;
Russ Coxee85def2009-02-04 20:13:18 -0800997 if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
rsc39405062005-01-13 04:56:07 +0000998 loc1 = subexp[0].s.rsp;
999 loc2 = subexp[0].e.rep;
1000 return 1;
1001 }
1002 loc1 = loc2 = 0;
1003 return 0;
1004}
1005
1006int
1007substitute(SedCom *ipc)
1008{
1009 int len;
1010
1011 if(!match(ipc->u.re1, linebuf))
1012 return 0;
1013
1014 /*
1015 * we have at least one match. some patterns, e.g. '$' or '^', can
1016 * produce zero-length matches, so during a global substitute we
1017 * must bump to the character after a zero-length match to keep from looping.
1018 */
1019 sflag = 1;
1020 if(ipc->gfl == 0) /* single substitution */
1021 dosub(ipc->rhs);
1022 else
1023 do{ /* global substitution */
1024 len = loc2-loc1; /* length of match */
1025 dosub(ipc->rhs); /* dosub moves loc2 */
1026 if(*loc2 == 0) /* end of string */
1027 break;
1028 if(len == 0) /* zero-length R.E. match */
1029 loc2++; /* bump over zero-length match */
1030 if(*loc2 == 0) /* end of string */
1031 break;
1032 } while(match(ipc->u.re1, loc2));
1033 return 1;
1034}
1035
1036void
1037dosub(Rune *rhsbuf)
1038{
1039 Rune *lp, *sp;
1040 Rune *rp;
1041 int c, n;
1042
1043 lp = linebuf;
1044 sp = genbuf;
1045 rp = rhsbuf;
1046 while (lp < loc1)
1047 *sp++ = *lp++;
1048 while(c = *rp++) {
1049 if (c == '&') {
1050 sp = place(sp, loc1, loc2);
1051 continue;
1052 }
Russ Cox0cadb432009-09-11 17:03:06 -04001053 if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB+'0') {
rsc39405062005-01-13 04:56:07 +00001054 n = c-'0';
1055 if (subexp[n].s.rsp && subexp[n].e.rep) {
1056 sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep);
1057 continue;
1058 }
1059 else {
1060 fprint(2, "sed: Invalid back reference \\%d\n",n);
1061 errexit();
1062 }
1063 }
1064 *sp++ = c;
1065 if (sp >= &genbuf[LBSIZE])
1066 fprint(2, "sed: Output line too long.\n");
1067 }
1068 lp = loc2;
1069 loc2 = sp - genbuf + linebuf;
1070 while (*sp++ = *lp++)
1071 if (sp >= &genbuf[LBSIZE])
1072 fprint(2, "sed: Output line too long.\n");
1073 lp = linebuf;
1074 sp = genbuf;
1075 while (*lp++ = *sp++)
1076 ;
1077 spend = lp-1;
1078}
1079
1080Rune *
1081place(Rune *sp, Rune *l1, Rune *l2)
1082{
1083 while (l1 < l2) {
1084 *sp++ = *l1++;
1085 if (sp >= &genbuf[LBSIZE])
1086 fprint(2, "sed: Output line too long.\n");
1087 }
1088 return(sp);
1089}
1090
1091char *
1092trans(int c)
1093{
1094 static char buf[] = "\\x0000";
1095 static char hex[] = "0123456789abcdef";
1096
1097 switch(c) {
1098 case '\b':
1099 return "\\b";
1100 case '\n':
1101 return "\\n";
1102 case '\r':
1103 return "\\r";
1104 case '\t':
1105 return "\\t";
1106 case '\\':
1107 return "\\\\";
1108 }
1109 buf[2] = hex[(c>>12)&0xF];
1110 buf[3] = hex[(c>>8)&0xF];
1111 buf[4] = hex[(c>>4)&0xF];
1112 buf[5] = hex[c&0xF];
1113 return buf;
1114}
1115
1116void
1117command(SedCom *ipc)
1118{
1119 int i, c;
1120 Rune *p1, *p2;
1121 char *ucp;
1122 Rune *rp;
1123 Rune *execp;
1124
1125 switch(ipc->command) {
1126
1127 case ACOM:
1128 *aptr++ = ipc;
1129 if(aptr >= abuf+MAXADDS) {
1130 quit("sed: Too many appends after line %ld\n",
1131 (char *) lnum);
1132 }
1133 *aptr = 0;
1134 break;
1135 case CCOM:
1136 delflag = 1;
1137 if(ipc->active == 1) {
1138 for(rp = ipc->u.text; *rp; rp++)
1139 Bputrune(&fout, *rp);
1140 Bputc(&fout, '\n');
1141 }
1142 break;
1143 case DCOM:
1144 delflag++;
1145 break;
1146 case CDCOM:
1147 p1 = p2 = linebuf;
1148 while(*p1 != '\n') {
1149 if(*p1++ == 0) {
1150 delflag++;
1151 return;
1152 }
1153 }
1154 p1++;
1155 while(*p2++ = *p1++)
1156 ;
1157 spend = p2-1;
1158 jflag++;
1159 break;
1160 case EQCOM:
1161 Bprint(&fout, "%ld\n", lnum);
1162 break;
1163 case GCOM:
1164 p1 = linebuf;
1165 p2 = holdsp;
1166 while(*p1++ = *p2++)
1167 ;
1168 spend = p1-1;
1169 break;
1170 case CGCOM:
1171 *spend++ = '\n';
1172 p1 = spend;
1173 p2 = holdsp;
1174 while(*p1++ = *p2++)
1175 if(p1 >= lbend)
1176 break;
1177 spend = p1-1;
1178 break;
1179 case HCOM:
1180 p1 = holdsp;
1181 p2 = linebuf;
1182 while(*p1++ = *p2++);
1183 hspend = p1-1;
1184 break;
1185 case CHCOM:
1186 *hspend++ = '\n';
1187 p1 = hspend;
1188 p2 = linebuf;
1189 while(*p1++ = *p2++)
1190 if(p1 >= hend)
1191 break;
1192 hspend = p1-1;
1193 break;
1194 case ICOM:
1195 for(rp = ipc->u.text; *rp; rp++)
1196 Bputrune(&fout, *rp);
1197 Bputc(&fout, '\n');
1198 break;
1199 case BCOM:
1200 jflag = 1;
1201 break;
1202 case LCOM:
1203 c = 0;
1204 for (i = 0, rp = linebuf; *rp; rp++) {
1205 c = *rp;
1206 if(c >= 0x20 && c < 0x7F && c != '\\') {
1207 Bputc(&fout, c);
1208 if(i++ > 71) {
1209 Bprint(&fout, "\\\n");
1210 i = 0;
1211 }
1212 } else {
1213 for (ucp = trans(*rp); *ucp; ucp++){
1214 c = *ucp;
1215 Bputc(&fout, c);
1216 if(i++ > 71) {
1217 Bprint(&fout, "\\\n");
1218 i = 0;
1219 }
1220 }
1221 }
1222 }
1223 if(c == ' ')
1224 Bprint(&fout, "\\n");
1225 Bputc(&fout, '\n');
1226 break;
1227 case NCOM:
1228 if(!nflag)
1229 putline(&fout, linebuf, spend-linebuf);
1230
1231 if(aptr > abuf)
1232 arout();
1233 if((execp = gline(linebuf)) == 0) {
1234 delflag = 1;
1235 break;
1236 }
1237 spend = execp;
1238 break;
1239 case CNCOM:
1240 if(aptr > abuf)
1241 arout();
1242 *spend++ = '\n';
1243 if((execp = gline(spend)) == 0) {
1244 delflag = 1;
1245 break;
1246 }
1247 spend = execp;
1248 break;
1249 case PCOM:
1250 putline(&fout, linebuf, spend-linebuf);
1251 break;
1252 case CPCOM:
1253 cpcom:
1254 for(rp = linebuf; *rp && *rp != '\n'; rp++)
1255 Bputc(&fout, *rp);
1256 Bputc(&fout, '\n');
1257 break;
1258 case QCOM:
1259 if(!nflag)
1260 putline(&fout, linebuf, spend-linebuf);
1261 if(aptr > abuf)
1262 arout();
1263 exits(0);
1264 case RCOM:
1265 *aptr++ = ipc;
1266 if(aptr >= &abuf[MAXADDS])
1267 quit("sed: Too many reads after line %ld\n",
1268 (char *) lnum);
1269 *aptr = 0;
1270 break;
1271 case SCOM:
1272 i = substitute(ipc);
1273 if(i && ipc->pfl)
1274 if(ipc->pfl == 1)
1275 putline(&fout, linebuf, spend-linebuf);
1276 else
1277 goto cpcom;
1278 if(i && ipc->fcode)
1279 goto wcom;
1280 break;
1281
1282 case TCOM:
1283 if(sflag == 0) break;
1284 sflag = 0;
1285 jflag = 1;
1286 break;
1287
1288 wcom:
1289 case WCOM:
1290 putline(ipc->fcode,linebuf, spend-linebuf);
1291 break;
1292 case XCOM:
1293 p1 = linebuf;
1294 p2 = genbuf;
1295 while(*p2++ = *p1++);
1296 p1 = holdsp;
1297 p2 = linebuf;
1298 while(*p2++ = *p1++);
1299 spend = p2 - 1;
1300 p1 = genbuf;
1301 p2 = holdsp;
1302 while(*p2++ = *p1++);
1303 hspend = p2 - 1;
1304 break;
1305 case YCOM:
1306 p1 = linebuf;
1307 p2 = ipc->u.text;
1308 for (i = *p2++; *p1; p1++){
1309 if (*p1 <= i) *p1 = p2[*p1];
1310 }
1311 break;
1312 }
1313
1314}
1315
1316void
1317putline(Biobuf *bp, Rune *buf, int n)
1318{
1319 while (n--)
1320 Bputrune(bp, *buf++);
1321 Bputc(bp, '\n');
rsca09e8572006-06-26 00:03:55 +00001322 if(lflag)
1323 Bflush(bp);
rsc39405062005-01-13 04:56:07 +00001324}
1325
1326int
1327ecmp(Rune *a, Rune *b, int count)
1328{
1329 while(count--)
1330 if(*a++ != *b++) return(0);
1331 return(1);
1332}
1333
1334void
1335arout(void)
1336{
1337 Rune *p1;
1338 Biobuf *fi;
1339 int c;
1340 char *s;
1341 char buf[128];
1342
1343 for (aptr = abuf; *aptr; aptr++) {
1344 if((*aptr)->command == ACOM) {
1345 for(p1 = (*aptr)->u.text; *p1; p1++ )
1346 Bputrune(&fout, *p1);
1347 Bputc(&fout, '\n');
1348 } else {
1349 for(s = buf, p1= (*aptr)->u.text; *p1; p1++)
1350 s += runetochar(s, p1);
1351 *s = '\0';
1352 if((fi = Bopen(buf, OREAD)) == 0)
1353 continue;
1354 while((c = Bgetc(fi)) >= 0)
1355 Bputc(&fout, c);
1356 Bterm(fi);
1357 }
1358 }
1359 aptr = abuf;
1360 *aptr = 0;
1361}
1362
1363void
1364errexit(void)
1365{
1366 exits("error");
1367}
1368
1369void
1370quit (char *msg, char *arg)
1371{
1372 fprint(2, "sed: ");
1373 fprint(2, msg, arg);
1374 fprint(2, "\n");
1375 errexit();
1376}
1377
1378Rune *
1379gline(Rune *addr)
1380{
1381 long c;
1382 Rune *p;
1383
1384 static long peekc = 0;
1385
1386 if (f == 0 && opendata() < 0)
1387 return 0;
1388 sflag = 0;
1389 lnum++;
1390/* Bflush(&fout);********* dumped 4/30/92 - bobf****/
1391 do {
1392 p = addr;
1393 for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1394 if (c == '\n') {
1395 if ((peekc = Bgetrune(f)) < 0) {
1396 if (fhead == 0)
1397 dolflag = 1;
1398 }
1399 *p = '\0';
1400 return p;
1401 }
1402 if (c && p < lbend)
1403 *p++ = c;
1404 }
1405 /* return partial final line, adding implicit newline */
1406 if(p != addr) {
1407 *p = '\0';
1408 peekc = -1;
1409 if (fhead == 0)
1410 dolflag = 1;
1411 return p;
1412 }
1413 peekc = 0;
1414 Bterm(f);
1415 } while (opendata() > 0); /* Switch to next stream */
1416 f = 0;
1417 return 0;
1418}
1419
1420 /* Data file input section - the intent is to transparently
1421 * catenate all data input streams.
1422 */
1423void
1424enroll(char *filename) /* Add a file to the input file cache */
1425{
1426 FileCache *fp;
1427
1428 if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0)
1429 quit("Out of memory", 0);
1430 if (ftail == 0)
1431 fhead = fp;
1432 else
1433 ftail->next = fp;
1434 ftail = fp;
1435 fp->next = 0;
1436 fp->name = filename; /* 0 => stdin */
1437}
1438
1439int
1440opendata(void)
1441{
1442 if (fhead == 0)
1443 return -1;
1444 if (fhead->name) {
1445 if ((f = Bopen(fhead->name, OREAD)) == 0)
1446 quit("Can't open %s", fhead->name);
1447 } else {
1448 Binit(&bstdin, 0, OREAD);
1449 f = &bstdin;
1450 }
1451 fhead = fhead->next;
1452 return 1;
1453}