blob: c15db2e4528f7fa6a5292017b89948449a769f67 [file] [log] [blame]
rsc39405062005-01-13 04:56:07 +00001#include <u.h>
2#include <libc.h>
3#include <bio.h>
4#include <ctype.h>
5
6#define Bungetrune Bungetc /* ok for now. */
7
8/*
9 * all these are 32 bit
10 */
11#define TBITSET ((32+NTERMS)/32) /* BOTCH?? +31 */
12#define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037)))
13#define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037)))
14#define NWORDS(n) (((n)+32)/32)
15
rsc459eae02005-01-14 17:40:02 +000016char *PARSER = "#9/lib/yaccpar";
17char *PARSERS = "#9/lib/yaccpars";
wkjcae9bfe2005-02-14 20:27:13 +000018
rsc39405062005-01-13 04:56:07 +000019#define TEMPNAME "y.tmp.XXXXXX"
20#define ACTNAME "y.acts.XXXXXX"
21#define OFILE "tab.c"
22#define FILEU "output"
23#define FILED "tab.h"
24#define FILEDEBUG "debug"
25
26enum
27{
28/*
29 * the following are adjustable
30 * according to memory size
31 */
32 ACTSIZE = 40000,
33 MEMSIZE = 40000,
34 NSTATES = 2000,
35 NTERMS = 511,
36 NPROD = 1600,
37 NNONTERM = 600,
38 TEMPSIZE = 2000,
39 CNAMSZ = 10000,
40 LSETSIZE = 2400,
41 WSETSIZE = 350,
42
43 NAMESIZE = 50,
44 NTYPES = 63,
45 ISIZE = 400,
46
47 PRIVATE = 0xE000, /* unicode private use */
48
49 /* relationships which must hold:
50 TBITSET ints must hold NTERMS+1 bits...
51 WSETSIZE >= NNONTERM
52 LSETSIZE >= NNONTERM
53 TEMPSIZE >= NTERMS + NNONTERM + 1
54 TEMPSIZE >= NSTATES
55 */
56
57 NTBASE = 010000,
58 ERRCODE = 8190,
59 ACCEPTCODE = 8191,
60
61 NOASC = 0, /* no assoc. */
62 LASC = 1, /* left assoc. */
63 RASC = 2, /* right assoc. */
64 BASC = 3, /* binary assoc. */
65
66 /* flags for state generation */
67
68 DONE = 0,
69 MUSTDO = 1,
70 MUSTLOOKAHEAD = 2,
71
72 /* flags for a rule having an action, and being reduced */
73
74 ACTFLAG = 04,
75 REDFLAG = 010,
76
77 /* output parser flags */
78 YYFLAG1 = -1000,
79
80 /* parse tokens */
81 IDENTIFIER = PRIVATE,
82 MARK,
83 TERM,
84 LEFT,
85 RIGHT,
86 BINARY,
87 PREC,
88 LCURLY,
89 IDENTCOLON,
90 NUMBER,
91 START,
92 TYPEDEF,
93 TYPENAME,
94 UNION,
95
96 ENDFILE = 0,
97
98 EMPTY = 1,
99 WHOKNOWS = 0,
100 OK = 1,
rsccbeb0b22006-04-01 19:24:03 +0000101 NOMORE = -1000
rsc39405062005-01-13 04:56:07 +0000102};
103
104 /* macros for getting associativity and precedence levels */
105
106#define ASSOC(i) ((i)&03)
107#define PLEVEL(i) (((i)>>4)&077)
108#define TYPE(i) (((i)>>10)&077)
109
110 /* macros for setting associativity and precedence levels */
111
112#define SETASC(i,j) i |= j
113#define SETPLEV(i,j) i |= (j<<4)
114#define SETTYPE(i,j) i |= (j<<10)
115
116 /* looping macros */
117
118#define TLOOP(i) for(i=1; i<=ntokens; i++)
119#define NTLOOP(i) for(i=0; i<=nnonter; i++)
120#define PLOOP(s,i) for(i=s; i<nprod; i++)
121#define SLOOP(i) for(i=0; i<nstate; i++)
122#define WSBUMP(x) x++
123#define WSLOOP(s,j) for(j=s; j<cwp; j++)
124#define ITMLOOP(i,p,q) for(q=pstate[i+1], p=pstate[i]; p<q; p++)
125#define SETLOOP(i) for(i=0; i<tbitset; i++)
126
127 /* command to clobber tempfiles after use */
128
129#define ZAPFILE(x) if(x) remove(x)
130
131 /* I/O descriptors */
132
133Biobuf* faction; /* file for saving actions */
134Biobuf* fdefine; /* file for #defines */
135Biobuf* fdebug; /* y.debug for strings for debugging */
136Biobuf* ftable; /* y.tab.c file */
137Biobuf* ftemp; /* tempfile to pass 2 */
138Biobuf* finput; /* input file */
139Biobuf* foutput; /* y.output file */
140
141 /* communication variables between various I/O routines */
142
143char* infile; /* input file name */
144int numbval; /* value of an input number */
145char tokname[NAMESIZE+4]; /* input token name, slop for runes and 0 */
146
147 /* structure declarations */
148
149typedef
150struct
151{
152 int lset[TBITSET];
153} Lkset;
154
155typedef
156struct
157{
158 int* pitem;
159 Lkset* look;
160} Item;
161
162typedef
163struct
164{
165 char* name;
166 int value;
167} Symb;
168
169typedef
170struct
171{
172 int* pitem;
173 int flag;
174 Lkset ws;
175} Wset;
176
177 /* storage of names */
178
179char cnames[CNAMSZ]; /* place where token and nonterminal names are stored */
180int cnamsz = CNAMSZ; /* size of cnames */
181char* cnamp = cnames; /* place where next name is to be put in */
182int ndefout = 4; /* number of defined symbols output */
183char* tempname;
184char* actname;
185char ttempname[] = TEMPNAME;
186char tactname[] = ACTNAME;
rsc459eae02005-01-14 17:40:02 +0000187char* parser;
rsc39405062005-01-13 04:56:07 +0000188char* yydebug;
wkjcae9bfe2005-02-14 20:27:13 +0000189int yyarg;
190int yyline = 1;
rsc39405062005-01-13 04:56:07 +0000191
192 /* storage of types */
193int ntypes; /* number of types defined */
194char* typeset[NTYPES]; /* pointers to type tags */
195
196 /* token information */
197
198int ntokens = 0 ; /* number of tokens */
199Symb tokset[NTERMS];
200int toklev[NTERMS]; /* vector with the precedence of the terminals */
201
202 /* nonterminal information */
203
204int nnonter = -1; /* the number of nonterminals */
205Symb nontrst[NNONTERM];
206int start; /* start symbol */
207
208 /* assigned token type values */
209int extval = 0;
210
211char* ytabc = OFILE; /* name of y.tab.c */
212
213 /* grammar rule information */
214
215int mem0[MEMSIZE] ; /* production storage */
216int* mem = mem0;
217int nprod = 1; /* number of productions */
218int* prdptr[NPROD]; /* pointers to descriptions of productions */
219int levprd[NPROD]; /* precedence levels for the productions */
220int rlines[NPROD]; /* line number for this rule */
221
222 /* state information */
223
224int nstate = 0; /* number of states */
225Item* pstate[NSTATES+2]; /* pointers to the descriptions of the states */
226int tystate[NSTATES]; /* contains type information about the states */
227int defact[NSTATES]; /* the default actions of states */
228int tstates[NTERMS]; /* states generated by terminal gotos */
229int ntstates[NNONTERM]; /* states generated by nonterminal gotos */
230int mstates[NSTATES]; /* chain of overflows of term/nonterm generation lists */
231int lastred; /* the number of the last reduction of a state */
232
233 /* lookahead set information */
234
235Lkset lkst[LSETSIZE];
236int nolook; /* flag to turn off lookahead computations */
237int tbitset; /* size of lookahead sets */
238int nlset = 0; /* next lookahead set index */
239int nolook = 0; /* flag to suppress lookahead computations */
240Lkset clset; /* temporary storage for lookahead computations */
241
242 /* working set information */
243
244Wset wsets[WSETSIZE];
245Wset* cwp;
246
247 /* storage for action table */
248
249int amem[ACTSIZE]; /* action table storage */
250int* memp = amem; /* next free action table position */
251int indgo[NSTATES]; /* index to the stored goto table */
252
253 /* temporary vector, indexable by states, terms, or ntokens */
254
255int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */
256int lineno = 1; /* current input line number */
257int fatfl = 1; /* if on, error is fatal */
258int nerrors = 0; /* number of errors */
259
260 /* statistics collection variables */
261
262int zzgoent;
263int zzgobest;
264int zzacent;
265int zzexcp;
266int zzclose;
267int zzrrconf;
268int zzsrconf;
269
270int* ggreed = lkst[0].lset;
271int* pgo = wsets[0].ws.lset;
272int* yypgo = &nontrst[0].value;
273
274int maxspr = 0; /* maximum spread of any entry */
275int maxoff = 0; /* maximum offset into a array */
276int* pmem = mem0;
277int* maxa;
278int nxdb = 0;
279int adb = 0;
280
281
282 /* storage for information about the nonterminals */
283
284int** pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */
285Lkset* pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */
286int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */
287
288 /* random stuff picked out from between functions */
289
290int indebug = 0;
291Wset* zzcwp = wsets;
292int zzgoent = 0;
293int zzgobest = 0;
294int zzacent = 0;
295int zzexcp = 0;
296int zzclose = 0;
297int zzsrconf = 0;
298int* zzmemsz = mem0;
299int zzrrconf = 0;
300int pidebug = 0; /* debugging flag for putitem */
301int gsdebug = 0;
302int cldebug = 0; /* debugging flag for closure */
303int pkdebug = 0;
304int g2debug = 0;
305
306struct
307{
308 char* name;
309 long value;
310} resrv[] =
311{
312 "binary", BINARY,
313 "left", LEFT,
314 "nonassoc", BINARY,
315 "prec", PREC,
316 "right", RIGHT,
317 "start", START,
318 "term", TERM,
319 "token", TERM,
320 "type", TYPEDEF,
321 "union", UNION,
322 0,
323};
324
325 /* define functions */
326
327void main(int, char**);
328void others(void);
329char* chcopy(char*, char*);
330char* writem(int*);
331char* symnam(int);
332void summary(void);
333void error(char*, ...);
334void aryfil(int*, int, int);
335int setunion(int*, int*);
336void prlook(Lkset*);
337void cpres(void);
338void cpfir(void);
339int state(int);
340void putitem(int*, Lkset*);
341void cempty(void);
342void stagen(void);
343void closure(int);
344Lkset* flset(Lkset*);
345void cleantmp(void);
346void intr(void);
347void setup(int, char**);
348void finact(void);
349int defin(int, char*);
350void defout(int);
351char* cstash(char*);
352long gettok(void);
353int fdtype(int);
354int chfind(int, char*);
355void cpyunion(void);
356void cpycode(void);
357int skipcom(void);
358void cpyact(int);
359void openup(char*, int, int, int, char*);
360void output(void);
361int apack(int*, int);
362void go2out(void);
363void go2gen(int);
364void precftn(int, int, int);
365void wract(int);
366void wrstate(int);
367void warray(char*, int*, int);
368void hideprod(void);
369void callopt(void);
370void gin(int);
371void stin(int);
372int nxti(void);
373void osummary(void);
374void aoutput(void);
375void arout(char*, int*, int);
376int gtnm(void);
377
378void
379main(int argc, char *argv[])
380{
rsc459eae02005-01-14 17:40:02 +0000381 PARSER = unsharp(PARSER);
382 PARSERS = unsharp(PARSERS);
383 parser = PARSER;
rsc39405062005-01-13 04:56:07 +0000384
385 setup(argc, argv); /* initialize and read productions */
386 tbitset = NWORDS(ntokens);
387 cpres(); /* make table of which productions yield a given nonterminal */
388 cempty(); /* make a table of which nonterminals can match the empty string */
389 cpfir(); /* make a table of firsts of nonterminals */
390 stagen(); /* generate the states */
391 output(); /* write the states and the tables */
392 go2out();
393 hideprod();
394 summary();
395 callopt();
396 others();
397 exits(0);
398}
399
400/*
401 * put out other arrays, copy the parsers
402 */
403void
404others(void)
405{
406 int c, i, j;
407
rsc459eae02005-01-14 17:40:02 +0000408 finput = Bopen(parser, OREAD);
rsc39405062005-01-13 04:56:07 +0000409 if(finput == 0)
410 error("cannot open parser %s: %r", parser);
411 warray("yyr1", levprd, nprod);
412 aryfil(temp1, nprod, 0);
413 PLOOP(1, i)
414 temp1[i] = prdptr[i+1]-prdptr[i]-2;
415 warray("yyr2", temp1, nprod);
416
417 aryfil(temp1, nstate, -1000);
418 TLOOP(i)
419 for(j=tstates[i]; j!=0; j=mstates[j])
420 temp1[j] = i;
421 NTLOOP(i)
422 for(j=ntstates[i]; j!=0; j=mstates[j])
423 temp1[j] = -i;
424 warray("yychk", temp1, nstate);
425 warray("yydef", defact, nstate);
426
427 /* put out token translation tables */
428 /* table 1 has 0-256 */
429 aryfil(temp1, 256, 0);
430 c = 0;
431 TLOOP(i) {
432 j = tokset[i].value;
433 if(j >= 0 && j < 256) {
434 if(temp1[j]) {
435 print("yacc bug -- cant have 2 different Ts with same value\n");
436 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
437 nerrors++;
438 }
439 temp1[j] = i;
440 if(j > c)
441 c = j;
442 }
443 }
444 warray("yytok1", temp1, c+1);
445
446 /* table 2 has PRIVATE-PRIVATE+256 */
447 aryfil(temp1, 256, 0);
448 c = 0;
449 TLOOP(i) {
450 j = tokset[i].value - PRIVATE;
451 if(j >= 0 && j < 256) {
452 if(temp1[j]) {
453 print("yacc bug -- cant have 2 different Ts with same value\n");
454 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
455 nerrors++;
456 }
457 temp1[j] = i;
458 if(j > c)
459 c = j;
460 }
461 }
462 warray("yytok2", temp1, c+1);
463
464 /* table 3 has everything else */
wkjcae9bfe2005-02-14 20:27:13 +0000465 Bprint(ftable, "static\tconst\tlong yytok3[] =\n{\n");
rsc39405062005-01-13 04:56:07 +0000466 c = 0;
467 TLOOP(i) {
468 j = tokset[i].value;
469 if(j >= 0 && j < 256)
470 continue;
471 if(j >= PRIVATE && j < 256+PRIVATE)
472 continue;
473
474 Bprint(ftable, "%4d,%4d,", j, i);
475 c++;
476 if(c%5 == 0)
477 Bprint(ftable, "\n");
478 }
479 Bprint(ftable, "%4d\n};\n", 0);
480
481 /* copy parser text */
482 while((c=Bgetrune(finput)) != Beof) {
483 if(c == '$') {
484 if((c = Bgetrune(finput)) != 'A')
485 Bputrune(ftable, '$');
486 else { /* copy actions */
487 faction = Bopen(actname, OREAD);
488 if(faction == 0)
489 error("cannot reopen action tempfile");
490 while((c=Bgetrune(faction)) != Beof)
491 Bputrune(ftable, c);
492 Bterm(faction);
493 ZAPFILE(actname);
494 c = Bgetrune(finput);
495 }
496 }
497 Bputrune(ftable, c);
498 }
499 Bterm(ftable);
500}
501
502/*
503 * copies string q into p, returning next free char ptr
504 */
505char*
506chcopy(char* p, char* q)
507{
508 int c;
509
510 while(c = *q) {
511 if(c == '"')
512 *p++ = '\\';
513 *p++ = c;
514 q++;
515 }
516 *p = 0;
517 return p;
518}
519
520/*
521 * creates output string for item pointed to by pp
522 */
523char*
524writem(int *pp)
525{
526 int i,*p;
527 static char sarr[ISIZE];
528 char* q;
529
530 for(p=pp; *p>0; p++)
531 ;
532 p = prdptr[-*p];
533 q = chcopy(sarr, nontrst[*p-NTBASE].name);
534 q = chcopy(q, ": ");
535 for(;;) {
536 *q = ' ';
537 p++;
538 if(p == pp)
539 *q = '.';
540 q++;
541 *q = '\0';
542 i = *p;
543 if(i <= 0)
544 break;
545 q = chcopy(q, symnam(i));
546 if(q > &sarr[ISIZE-30])
547 error("item too big");
548 }
549
550 /* an item calling for a reduction */
551 i = *pp;
552 if(i < 0 ) {
553 q = chcopy(q, " (");
554 sprint(q, "%d)", -i);
555 }
556 return sarr;
557}
558
559/*
560 * return a pointer to the name of symbol i
561 */
562char*
563symnam(int i)
564{
565 char* cp;
566
567 cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
568 if(*cp == ' ')
569 cp++;
570 return cp;
571}
572
573/*
574 * output the summary on y.output
575 */
576void
577summary(void)
578{
579
580 if(foutput != 0) {
581 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
582 ntokens, NTERMS, nnonter, NNONTERM);
583 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
584 nprod, NPROD, nstate, NSTATES);
585 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
586 zzsrconf, zzrrconf);
587 Bprint(foutput, "%d/%d working sets used\n",
588 (int)(zzcwp-wsets), WSETSIZE);
589 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
590 (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
591 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
592 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
593 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
594 Bprint(foutput, "%d goto entries\n", zzgoent);
595 Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
596 }
597 if(zzsrconf != 0 || zzrrconf != 0) {
598 print("\nconflicts: ");
599 if(zzsrconf)
600 print("%d shift/reduce", zzsrconf);
601 if(zzsrconf && zzrrconf)
602 print(", ");
603 if(zzrrconf)
604 print("%d reduce/reduce", zzrrconf);
605 print("\n");
606 }
607 if(ftemp != 0) {
608 Bterm(ftemp);
609 ftemp = 0;
610 }
611 if(fdefine != 0) {
612 Bterm(fdefine);
613 fdefine = 0;
614 }
615}
616
617/*
618 * write out error comment -- NEEDS WORK
619 */
620void
621error(char *s, ...)
622{
623 va_list arg;
624
625 nerrors++;
626 fprint(2, "\n fatal error:");
627 va_start(arg, s);
628 vfprint(2, s, arg);
629 va_end(arg);
630 fprint(2, ", %s:%d\n", infile, lineno);
631 if(!fatfl)
632 return;
633 summary();
634 cleantmp();
635 exits("error");
636}
637
638/*
639 * set elements 0 through n-1 to c
640 */
641void
642aryfil(int *v, int n, int c)
643{
644 int i;
645
646 for(i=0; i<n; i++)
647 v[i] = c;
648}
649
650/*
651 * set a to the union of a and b
652 * return 1 if b is not a subset of a, 0 otherwise
653 */
654int
655setunion(int *a, int *b)
656{
657 int i, x, sub;
658
659 sub = 0;
660 SETLOOP(i) {
661 x = *a;
662 *a |= *b;
663 if(*a != x)
664 sub = 1;
665 a++;
666 b++;
667 }
668 return sub;
669}
670
671void
672prlook(Lkset* p)
673{
674 int j, *pp;
675
676 pp = p->lset;
677 if(pp == 0)
678 Bprint(foutput, "\tNULL");
679 else {
680 Bprint(foutput, " { ");
681 TLOOP(j)
682 if(BIT(pp,j))
683 Bprint(foutput, "%s ", symnam(j));
684 Bprint(foutput, "}");
685 }
686}
687
688/*
689 * compute an array with the beginnings of productions yielding given nonterminals
690 * The array pres points to these lists
691 * the array pyield has the lists: the total size is only NPROD+1
692 */
693void
694cpres(void)
695{
696 int c, j, i, **pmem;
697 static int *pyield[NPROD];
698
699 pmem = pyield;
700 NTLOOP(i) {
701 c = i+NTBASE;
702 pres[i] = pmem;
703 fatfl = 0; /* make undefined symbols nonfatal */
704 PLOOP(0, j)
705 if(*prdptr[j] == c)
706 *pmem++ = prdptr[j]+1;
707 if(pres[i] == pmem)
708 error("nonterminal %s not defined!", nontrst[i].name);
709 }
710 pres[i] = pmem;
711 fatfl = 1;
712 if(nerrors) {
713 summary();
714 cleantmp();
715 exits("error");
716 }
717 if(pmem != &pyield[nprod])
718 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
719}
720
721/*
722 * compute an array with the first of nonterminals
723 */
724void
725cpfir(void)
726{
727 int *p, **s, i, **t, ch, changes;
728
729 zzcwp = &wsets[nnonter];
730 NTLOOP(i) {
731 aryfil(wsets[i].ws.lset, tbitset, 0);
732 t = pres[i+1];
733 /* initially fill the sets */
734 for(s=pres[i]; s<t; ++s)
735 for(p = *s; (ch = *p) > 0; ++p) {
736 if(ch < NTBASE) {
737 SETBIT(wsets[i].ws.lset, ch);
738 break;
739 }
740 if(!pempty[ch-NTBASE])
741 break;
742 }
743 }
744
745 /* now, reflect transitivity */
746 changes = 1;
747 while(changes) {
748 changes = 0;
749 NTLOOP(i) {
750 t = pres[i+1];
751 for(s = pres[i]; s < t; ++s)
752 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
753 changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
754 if(!pempty[ch])
755 break;
756 }
757 }
758 }
759
760 NTLOOP(i)
761 pfirst[i] = flset(&wsets[i].ws);
762 if(!indebug)
763 return;
764 if(foutput != 0)
765 NTLOOP(i) {
766 Bprint(foutput, "\n%s: ", nontrst[i].name);
767 prlook(pfirst[i]);
768 Bprint(foutput, " %d\n", pempty[i]);
769 }
770}
771
772/*
773 * sorts last state,and sees if it equals earlier ones. returns state number
774 */
775int
776state(int c)
777{
778 Item *p1, *p2, *k, *l, *q1, *q2;
779 int size1, size2, i;
780
781 p1 = pstate[nstate];
782 p2 = pstate[nstate+1];
783 if(p1 == p2)
784 return 0; /* null state */
785 /* sort the items */
786 for(k = p2-1; k > p1; k--) /* make k the biggest */
787 for(l = k-1; l >= p1; --l)
788 if(l->pitem > k->pitem) {
789 int *s;
790 Lkset *ss;
791
792 s = k->pitem;
793 k->pitem = l->pitem;
794 l->pitem = s;
795 ss = k->look;
796 k->look = l->look;
797 l->look = ss;
798 }
799 size1 = p2 - p1; /* size of state */
800
801 for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
802 /* get ith state */
803 q1 = pstate[i];
804 q2 = pstate[i+1];
805 size2 = q2 - q1;
806 if(size1 != size2)
807 continue;
808 k = p1;
809 for(l = q1; l < q2; l++) {
810 if(l->pitem != k->pitem)
811 break;
812 k++;
813 }
814 if(l != q2)
815 continue;
816 /* found it */
817 pstate[nstate+1] = pstate[nstate]; /* delete last state */
818 /* fix up lookaheads */
819 if(nolook)
820 return i;
821 for(l = q1, k = p1; l < q2; ++l, ++k ) {
822 int s;
823
824 SETLOOP(s)
825 clset.lset[s] = l->look->lset[s];
826 if(setunion(clset.lset, k->look->lset)) {
827 tystate[i] = MUSTDO;
828 /* register the new set */
829 l->look = flset( &clset );
830 }
831 }
832 return i;
833 }
834 /* state is new */
835 if(nolook)
836 error("yacc state/nolook error");
837 pstate[nstate+2] = p2;
838 if(nstate+1 >= NSTATES)
839 error("too many states");
840 if(c >= NTBASE) {
841 mstates[nstate] = ntstates[c-NTBASE];
842 ntstates[c-NTBASE] = nstate;
843 } else {
844 mstates[nstate] = tstates[c];
845 tstates[c] = nstate;
846 }
847 tystate[nstate] = MUSTDO;
848 return nstate++;
849}
850
851void
852putitem(int *ptr, Lkset *lptr)
853{
854 Item *j;
855
856 if(pidebug && foutput != 0)
857 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
858 j = pstate[nstate+1];
859 j->pitem = ptr;
860 if(!nolook)
861 j->look = flset(lptr);
862 pstate[nstate+1] = ++j;
863 if((int*)j > zzmemsz) {
864 zzmemsz = (int*)j;
865 if(zzmemsz >= &mem0[MEMSIZE])
866 error("out of state space");
867 }
868}
869
870/*
871 * mark nonterminals which derive the empty string
872 * also, look for nonterminals which don't derive any token strings
873 */
874void
875cempty(void)
876{
877
878 int i, *p;
879
880 /* first, use the array pempty to detect productions that can never be reduced */
881 /* set pempty to WHONOWS */
882 aryfil(pempty, nnonter+1, WHOKNOWS);
883
884 /* now, look at productions, marking nonterminals which derive something */
885more:
886 PLOOP(0, i) {
887 if(pempty[*prdptr[i] - NTBASE])
888 continue;
889 for(p = prdptr[i]+1; *p >= 0; ++p)
890 if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
891 break;
892 /* production can be derived */
893 if(*p < 0) {
894 pempty[*prdptr[i]-NTBASE] = OK;
895 goto more;
896 }
897 }
898
899 /* now, look at the nonterminals, to see if they are all OK */
900 NTLOOP(i) {
901 /* the added production rises or falls as the start symbol ... */
902 if(i == 0)
903 continue;
904 if(pempty[i] != OK) {
905 fatfl = 0;
906 error("nonterminal %s never derives any token string", nontrst[i].name);
907 }
908 }
909
910 if(nerrors) {
911 summary();
912 cleantmp();
913 exits("error");
914 }
915
916 /* now, compute the pempty array, to see which nonterminals derive the empty string */
917 /* set pempty to WHOKNOWS */
918 aryfil( pempty, nnonter+1, WHOKNOWS);
919
920 /* loop as long as we keep finding empty nonterminals */
921
922again:
923 PLOOP(1, i) {
924 /* not known to be empty */
925 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
926 for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
927 ;
928 /* we have a nontrivially empty nonterminal */
929 if(*p < 0) {
930 pempty[*prdptr[i]-NTBASE] = EMPTY;
931 /* got one ... try for another */
932 goto again;
933 }
934 }
935 }
936}
937
938/*
939 * generate the states
940 */
941void
942stagen(void)
943{
944
945 int c, i, j, more;
946 Wset *p, *q;
947
948 /* initialize */
949 nstate = 0;
950
951 /* THIS IS FUNNY from the standpoint of portability
952 * it represents the magic moment when the mem0 array, which has
953 * been holding the productions, starts to hold item pointers, of a
954 * different type...
955 * someday, alloc should be used to allocate all this stuff... for now, we
956 * accept that if pointers don't fit in integers, there is a problem...
957 */
958
959 pstate[0] = pstate[1] = (Item*)mem;
960 aryfil(clset.lset, tbitset, 0);
961 putitem(prdptr[0]+1, &clset);
962 tystate[0] = MUSTDO;
963 nstate = 1;
964 pstate[2] = pstate[1];
965
966 aryfil(amem, ACTSIZE, 0);
967
968 /* now, the main state generation loop */
969 for(more=1; more;) {
970 more = 0;
971 SLOOP(i) {
972 if(tystate[i] != MUSTDO)
973 continue;
974 tystate[i] = DONE;
975 aryfil(temp1, nnonter+1, 0);
976 /* take state i, close it, and do gotos */
977 closure(i);
978 /* generate goto's */
979 WSLOOP(wsets, p) {
980 if(p->flag)
981 continue;
982 p->flag = 1;
983 c = *(p->pitem);
984 if(c <= 1) {
985 if(pstate[i+1]-pstate[i] <= p-wsets)
986 tystate[i] = MUSTLOOKAHEAD;
987 continue;
988 }
989 /* do a goto on c */
990 WSLOOP(p, q)
991 /* this item contributes to the goto */
992 if(c == *(q->pitem)) {
993 putitem(q->pitem+1, &q->ws);
994 q->flag = 1;
995 }
996 if(c < NTBASE)
997 state(c); /* register new state */
998 else
999 temp1[c-NTBASE] = state(c);
1000 }
1001 if(gsdebug && foutput != 0) {
1002 Bprint(foutput, "%d: ", i);
1003 NTLOOP(j)
1004 if(temp1[j])
1005 Bprint(foutput, "%s %d, ",
1006 nontrst[j].name, temp1[j]);
1007 Bprint(foutput, "\n");
1008 }
1009 indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1010 /* do some more */
1011 more = 1;
1012 }
1013 }
1014}
1015
1016/*
1017 * generate the closure of state i
1018 */
1019void
1020closure(int i)
1021{
1022
1023 Wset *u, *v;
1024 Item *p, *q;
1025 int c, ch, work, k, *pi, **s, **t;
1026
1027 zzclose++;
1028
1029 /* first, copy kernel of state i to wsets */
1030 cwp = wsets;
1031 ITMLOOP(i, p, q) {
1032 cwp->pitem = p->pitem;
1033 cwp->flag = 1; /* this item must get closed */
1034 SETLOOP(k)
1035 cwp->ws.lset[k] = p->look->lset[k];
1036 WSBUMP(cwp);
1037 }
1038
1039 /* now, go through the loop, closing each item */
1040 work = 1;
1041 while(work) {
1042 work = 0;
1043 WSLOOP(wsets, u) {
1044 if(u->flag == 0)
1045 continue;
1046 /* dot is before c */
1047 c = *(u->pitem);
1048 if(c < NTBASE) {
1049 u->flag = 0;
1050 /* only interesting case is where . is before nonterminal */
1051 continue;
1052 }
1053
1054 /* compute the lookahead */
1055 aryfil(clset.lset, tbitset, 0);
1056
1057 /* find items involving c */
1058 WSLOOP(u, v)
1059 if(v->flag == 1 && *(pi=v->pitem) == c) {
1060 v->flag = 0;
1061 if(nolook)
1062 continue;
1063 while((ch = *++pi) > 0) {
1064 /* terminal symbol */
1065 if(ch < NTBASE) {
1066 SETBIT(clset.lset, ch);
1067 break;
1068 }
1069 /* nonterminal symbol */
1070 setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1071 if(!pempty[ch-NTBASE])
1072 break;
1073 }
1074 if(ch <= 0)
1075 setunion(clset.lset, v->ws.lset);
1076 }
1077
1078 /*
1079 * now loop over productions derived from c
1080 * c is now nonterminal number
1081 */
1082 c -= NTBASE;
1083 t = pres[c+1];
1084 for(s = pres[c]; s < t; ++s) {
1085 /*
1086 * put these items into the closure
1087 * is the item there
1088 */
1089 WSLOOP(wsets, v)
1090 /* yes, it is there */
1091 if(v->pitem == *s) {
1092 if(nolook)
1093 goto nexts;
1094 if(setunion(v->ws.lset, clset.lset))
1095 v->flag = work = 1;
1096 goto nexts;
1097 }
1098
1099 /* not there; make a new entry */
1100 if(cwp-wsets+1 >= WSETSIZE)
1101 error( "working set overflow");
1102 cwp->pitem = *s;
1103 cwp->flag = 1;
1104 if(!nolook) {
1105 work = 1;
1106 SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1107 }
1108 WSBUMP(cwp);
1109
1110 nexts:;
1111 }
1112 }
1113 }
1114
1115 /* have computed closure; flags are reset; return */
1116 if(cwp > zzcwp)
1117 zzcwp = cwp;
1118 if(cldebug && foutput != 0) {
1119 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1120 WSLOOP(wsets, u) {
1121 if(u->flag)
1122 Bprint(foutput, "flag set!\n");
1123 u->flag = 0;
1124 Bprint(foutput, "\t%s", writem(u->pitem));
1125 prlook(&u->ws);
1126 Bprint(foutput, "\n");
1127 }
1128 }
1129}
1130
1131/*
1132 * decide if the lookahead set pointed to by p is known
1133 * return pointer to a perminent location for the set
1134 */
1135Lkset*
1136flset(Lkset *p)
1137{
1138 Lkset *q;
1139 int *u, *v, *w, j;
1140
1141 for(q = &lkst[nlset]; q-- > lkst;) {
1142 u = p->lset;
1143 v = q->lset;
1144 w = &v[tbitset];
1145 while(v < w)
1146 if(*u++ != *v++)
1147 goto more;
1148 /* we have matched */
1149 return q;
1150 more:;
1151 }
1152 /* add a new one */
1153 q = &lkst[nlset++];
1154 if(nlset >= LSETSIZE)
1155 error("too many lookahead sets");
1156 SETLOOP(j)
1157 q->lset[j] = p->lset[j];
1158 return q;
1159}
1160
1161void
1162cleantmp(void)
1163{
1164 ZAPFILE(actname);
1165 ZAPFILE(tempname);
1166}
1167
1168void
1169intr(void)
1170{
1171 cleantmp();
1172 exits("interrupted");
1173}
1174
1175void
1176setup(int argc, char *argv[])
1177{
1178 long c, t;
1179 int i, j, fd, lev, ty, ytab, *p;
1180 int vflag, dflag, stem;
1181 char actnm[8], *stemc, *s, dirbuf[128];
wkjcae9bfe2005-02-14 20:27:13 +00001182 Biobuf *fout;
rsc39405062005-01-13 04:56:07 +00001183
1184 ytab = 0;
1185 vflag = 0;
1186 dflag = 0;
1187 stem = 0;
1188 stemc = "y";
1189 foutput = 0;
1190 fdefine = 0;
1191 fdebug = 0;
1192 ARGBEGIN{
1193 case 'v':
1194 case 'V':
1195 vflag++;
1196 break;
1197 case 'D':
1198 yydebug = ARGF();
1199 break;
wkjcae9bfe2005-02-14 20:27:13 +00001200 case 'a':
1201 yyarg = 1;
1202 break;
rsc39405062005-01-13 04:56:07 +00001203 case 'd':
1204 dflag++;
1205 break;
wkjcae9bfe2005-02-14 20:27:13 +00001206 case 'l':
1207 yyline = 0;
1208 break;
rsc39405062005-01-13 04:56:07 +00001209 case 'o':
1210 ytab++;
1211 ytabc = ARGF();
1212 break;
1213 case 's':
1214 stem++;
1215 stemc = ARGF();
1216 break;
1217 case 'S':
1218 parser = PARSERS;
1219 break;
1220 default:
1221 error("illegal option: %c", ARGC());
1222 }ARGEND
1223 openup(stemc, dflag, vflag, ytab, ytabc);
wkjcae9bfe2005-02-14 20:27:13 +00001224 fout = dflag?fdefine:ftable;
1225 if(yyarg){
1226 Bprint(fdefine, "#define\tYYARG\t1\n\n");
1227 }
rsc39405062005-01-13 04:56:07 +00001228 if((fd = mkstemp(ttempname)) >= 0){
1229 tempname = ttempname;
1230 ftemp = Bfdopen(fd, OWRITE);
1231 }
1232 if((fd = mkstemp(tactname)) >= 0){
1233 actname = tactname;
1234 faction = Bfdopen(fd, OWRITE);
1235 }
1236 if(ftemp == 0 || faction == 0)
1237 error("cannot open temp file");
1238 if(argc < 1)
1239 error("no input file");
1240 infile = argv[0];
1241 if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1242 i = strlen(infile)+1+strlen(dirbuf)+1+10;
1243 s = malloc(i);
1244 if(s != nil){
1245 snprint(s, i, "%s/%s", dirbuf, infile);
1246 cleanname(s);
1247 infile = s;
1248 }
1249 }
1250 finput = Bopen(infile, OREAD);
1251 if(finput == 0)
1252 error("cannot open '%s'", argv[0]);
1253 cnamp = cnames;
1254
1255 defin(0, "$end");
1256 extval = PRIVATE; /* tokens start in unicode 'private use' */
1257 defin(0, "error");
1258 defin(1, "$accept");
1259 defin(0, "$unk");
1260 mem = mem0;
1261 i = 0;
1262
1263 for(t = gettok(); t != MARK && t != ENDFILE;)
1264 switch(t) {
1265 case ';':
1266 t = gettok();
1267 break;
1268
1269 case START:
1270 if(gettok() != IDENTIFIER)
1271 error("bad %%start construction");
1272 start = chfind(1, tokname);
1273 t = gettok();
1274 continue;
1275
1276 case TYPEDEF:
1277 if(gettok() != TYPENAME)
1278 error("bad syntax in %%type");
1279 ty = numbval;
1280 for(;;) {
1281 t = gettok();
1282 switch(t) {
1283 case IDENTIFIER:
1284 if((t=chfind(1, tokname)) < NTBASE) {
1285 j = TYPE(toklev[t]);
1286 if(j != 0 && j != ty)
1287 error("type redeclaration of token %s",
1288 tokset[t].name);
1289 else
1290 SETTYPE(toklev[t], ty);
1291 } else {
1292 j = nontrst[t-NTBASE].value;
1293 if(j != 0 && j != ty)
1294 error("type redeclaration of nonterminal %s",
1295 nontrst[t-NTBASE].name );
1296 else
1297 nontrst[t-NTBASE].value = ty;
1298 }
1299 case ',':
1300 continue;
1301 case ';':
1302 t = gettok();
1303 default:
1304 break;
1305 }
1306 break;
1307 }
1308 continue;
1309
1310 case UNION:
1311 /* copy the union declaration to the output */
1312 cpyunion();
1313 t = gettok();
1314 continue;
1315
1316 case LEFT:
1317 case BINARY:
1318 case RIGHT:
1319 i++;
1320
1321 case TERM:
1322 /* nonzero means new prec. and assoc. */
1323 lev = t-TERM;
1324 ty = 0;
1325
1326 /* get identifiers so defined */
1327 t = gettok();
1328
1329 /* there is a type defined */
1330 if(t == TYPENAME) {
1331 ty = numbval;
1332 t = gettok();
1333 }
1334 for(;;) {
1335 switch(t) {
1336 case ',':
1337 t = gettok();
1338 continue;
1339
1340 case ';':
1341 break;
1342
1343 case IDENTIFIER:
1344 j = chfind(0, tokname);
1345 if(j >= NTBASE)
1346 error("%s defined earlier as nonterminal", tokname);
1347 if(lev) {
1348 if(ASSOC(toklev[j]))
1349 error("redeclaration of precedence of %s", tokname);
1350 SETASC(toklev[j], lev);
1351 SETPLEV(toklev[j], i);
1352 }
1353 if(ty) {
1354 if(TYPE(toklev[j]))
1355 error("redeclaration of type of %s", tokname);
1356 SETTYPE(toklev[j],ty);
1357 }
1358 t = gettok();
1359 if(t == NUMBER) {
1360 tokset[j].value = numbval;
1361 if(j < ndefout && j > 3)
1362 error("please define type number of %s earlier",
1363 tokset[j].name);
1364 t = gettok();
1365 }
1366 continue;
1367 }
1368 break;
1369 }
1370 continue;
1371
1372 case LCURLY:
1373 defout(0);
1374 cpycode();
1375 t = gettok();
1376 continue;
1377
1378 default:
1379 error("syntax error");
1380 }
1381 if(t == ENDFILE)
1382 error("unexpected EOF before %%");
1383
1384 /* t is MARK */
wkjcae9bfe2005-02-14 20:27:13 +00001385 if(!yyarg)
1386 Bprint(ftable, "extern int yyerrflag;\n");
rsc39405062005-01-13 04:56:07 +00001387 Bprint(ftable, "#ifndef YYMAXDEPTH\n");
1388 Bprint(ftable, "#define YYMAXDEPTH 150\n");
1389 Bprint(ftable, "#endif\n" );
1390 if(!ntypes) {
1391 Bprint(ftable, "#ifndef YYSTYPE\n");
1392 Bprint(ftable, "#define YYSTYPE int\n");
1393 Bprint(ftable, "#endif\n");
1394 }
wkjcae9bfe2005-02-14 20:27:13 +00001395 if(!yyarg){
1396 Bprint(ftable, "YYSTYPE yylval;\n");
1397 Bprint(ftable, "YYSTYPE yyval;\n");
1398 }else{
1399 if(dflag)
1400 Bprint(ftable, "#include \"%s.%s\"\n\n", stemc, FILED);
1401 Bprint(fout, "struct Yyarg {\n");
1402 Bprint(fout, "\tint\tyynerrs;\n");
1403 Bprint(fout, "\tint\tyyerrflag;\n");
1404 Bprint(fout, "\tvoid*\targ;\n");
1405 Bprint(fout, "\tYYSTYPE\tyyval;\n");
1406 Bprint(fout, "\tYYSTYPE\tyylval;\n");
1407 Bprint(fout, "};\n\n");
1408 }
rsc39405062005-01-13 04:56:07 +00001409 prdptr[0] = mem;
1410
1411 /* added production */
1412 *mem++ = NTBASE;
1413
1414 /* if start is 0, we will overwrite with the lhs of the first rule */
1415 *mem++ = start;
1416 *mem++ = 1;
1417 *mem++ = 0;
1418 prdptr[1] = mem;
1419 while((t=gettok()) == LCURLY)
1420 cpycode();
1421 if(t != IDENTCOLON)
1422 error("bad syntax on first rule");
1423
1424 if(!start)
1425 prdptr[0][1] = chfind(1, tokname);
1426
1427 /* read rules */
1428 while(t != MARK && t != ENDFILE) {
1429 /* process a rule */
1430 rlines[nprod] = lineno;
1431 if(t == '|')
1432 *mem++ = *prdptr[nprod-1];
1433 else
1434 if(t == IDENTCOLON) {
1435 *mem = chfind(1, tokname);
1436 if(*mem < NTBASE)
1437 error("token illegal on LHS of grammar rule");
1438 mem++;
1439 } else
1440 error("illegal rule: missing semicolon or | ?");
1441 /* read rule body */
1442 t = gettok();
1443
1444 more_rule:
1445 while(t == IDENTIFIER) {
1446 *mem = chfind(1, tokname);
1447 if(*mem < NTBASE)
1448 levprd[nprod] = toklev[*mem];
1449 mem++;
1450 t = gettok();
1451 }
1452 if(t == PREC) {
1453 if(gettok() != IDENTIFIER)
1454 error("illegal %%prec syntax");
1455 j = chfind(2, tokname);
1456 if(j >= NTBASE)
1457 error("nonterminal %s illegal after %%prec",
1458 nontrst[j-NTBASE].name);
1459 levprd[nprod] = toklev[j];
1460 t = gettok();
1461 }
1462 if(t == '=') {
1463 levprd[nprod] |= ACTFLAG;
1464 Bprint(faction, "\ncase %d:", nprod);
1465 cpyact(mem-prdptr[nprod]-1);
1466 Bprint(faction, " break;");
1467 if((t=gettok()) == IDENTIFIER) {
1468
1469 /* action within rule... */
1470 sprint(actnm, "$$%d", nprod);
1471
1472 /* make it a nonterminal */
1473 j = chfind(1, actnm);
1474
1475 /*
1476 * the current rule will become rule number nprod+1
1477 * move the contents down, and make room for the null
1478 */
1479 for(p = mem; p >= prdptr[nprod]; --p)
1480 p[2] = *p;
1481 mem += 2;
1482
1483 /* enter null production for action */
1484 p = prdptr[nprod];
1485 *p++ = j;
1486 *p++ = -nprod;
1487
1488 /* update the production information */
1489 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1490 levprd[nprod] = ACTFLAG;
1491 if(++nprod >= NPROD)
1492 error("more than %d rules", NPROD);
1493 prdptr[nprod] = p;
1494
1495 /* make the action appear in the original rule */
1496 *mem++ = j;
1497
1498 /* get some more of the rule */
1499 goto more_rule;
1500 }
1501 }
1502
1503 while(t == ';')
1504 t = gettok();
1505 *mem++ = -nprod;
1506
1507 /* check that default action is reasonable */
1508 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1509
1510 /* no explicit action, LHS has value */
1511 int tempty;
1512
1513 tempty = prdptr[nprod][1];
1514 if(tempty < 0)
1515 error("must return a value, since LHS has a type");
1516 else
1517 if(tempty >= NTBASE)
1518 tempty = nontrst[tempty-NTBASE].value;
1519 else
1520 tempty = TYPE(toklev[tempty]);
1521 if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1522 error("default action causes potential type clash");
1523 }
1524 nprod++;
1525 if(nprod >= NPROD)
1526 error("more than %d rules", NPROD);
1527 prdptr[nprod] = mem;
1528 levprd[nprod] = 0;
1529 }
1530
1531 /* end of all rules */
1532 defout(1);
1533
1534 finact();
1535 if(t == MARK) {
wkjcae9bfe2005-02-14 20:27:13 +00001536 Bprint(ftable, "\n");
1537 if(yyline)
1538 Bprint(ftable, "#line\t%d\t\"%s\"\n", lineno, infile);
rsc39405062005-01-13 04:56:07 +00001539 while((c=Bgetrune(finput)) != Beof)
1540 Bputrune(ftable, c);
1541 }
1542 Bterm(finput);
1543}
1544
1545/*
1546 * finish action routine
1547 */
1548void
1549finact(void)
1550{
1551
1552 Bterm(faction);
1553 Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1554 Bprint(ftable, "#define YYERRCODE %d\n", 2);
1555}
1556
1557/*
1558 * define s to be a terminal if t=0
1559 * or a nonterminal if t=1
1560 */
1561int
1562defin(int nt, char *s)
1563{
1564 int val;
1565 Rune rune;
1566
1567 val = 0;
1568 if(nt) {
1569 nnonter++;
1570 if(nnonter >= NNONTERM)
1571 error("too many nonterminals, limit %d",NNONTERM);
1572 nontrst[nnonter].name = cstash(s);
1573 return NTBASE + nnonter;
1574 }
1575
1576 /* must be a token */
1577 ntokens++;
1578 if(ntokens >= NTERMS)
1579 error("too many terminals, limit %d", NTERMS);
1580 tokset[ntokens].name = cstash(s);
1581
1582 /* establish value for token */
1583 /* single character literal */
1584 if(s[0] == ' ') {
1585 val = chartorune(&rune, &s[1]);
1586 if(s[val+1] == 0) {
1587 val = rune;
1588 goto out;
1589 }
1590 }
1591
1592 /* escape sequence */
1593 if(s[0] == ' ' && s[1] == '\\') {
1594 if(s[3] == 0) {
1595 /* single character escape sequence */
1596 switch(s[2]) {
1597 case 'n': val = '\n'; break;
1598 case 'r': val = '\r'; break;
1599 case 'b': val = '\b'; break;
1600 case 't': val = '\t'; break;
1601 case 'f': val = '\f'; break;
1602 case '\'': val = '\''; break;
1603 case '"': val = '"'; break;
1604 case '\\': val = '\\'; break;
1605 default: error("invalid escape");
1606 }
1607 goto out;
1608 }
1609
1610 /* \nnn sequence */
1611 if(s[2] >= '0' && s[2] <= '7') {
1612 if(s[3] < '0' ||
1613 s[3] > '7' ||
1614 s[4] < '0' ||
1615 s[4] > '7' ||
1616 s[5] != 0)
1617 error("illegal \\nnn construction");
1618 val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1619 if(val == 0)
1620 error("'\\000' is illegal");
1621 goto out;
1622 }
1623 error("unknown escape");
1624 }
1625 val = extval++;
1626
1627out:
1628 tokset[ntokens].value = val;
1629 toklev[ntokens] = 0;
1630 return ntokens;
1631}
1632
1633/*
1634 * write out the defines (at the end of the declaration section)
1635 */
1636void
1637defout(int last)
1638{
1639 int i, c;
1640 char sar[NAMESIZE+10];
1641
1642 for(i=ndefout; i<=ntokens; i++) {
1643 /* non-literals */
1644 c = tokset[i].name[0];
1645 if(c != ' ' && c != '$') {
1646 Bprint(ftable, "#define %s %d\n",
1647 tokset[i].name, tokset[i].value);
1648 if(fdefine)
1649 Bprint(fdefine, "#define\t%s\t%d\n",
1650 tokset[i].name, tokset[i].value);
1651 }
1652 }
1653 ndefout = ntokens+1;
1654 if(last && fdebug) {
wkjcae9bfe2005-02-14 20:27:13 +00001655 Bprint(fdebug, "static char* yytoknames[] =\n{\n");
rsc39405062005-01-13 04:56:07 +00001656 TLOOP(i) {
1657 if(tokset[i].name) {
1658 chcopy(sar, tokset[i].name);
1659 Bprint(fdebug, "\t\"%s\",\n", sar);
1660 continue;
1661 }
1662 Bprint(fdebug, "\t0,\n");
1663 }
1664 Bprint(fdebug, "};\n");
1665 }
1666}
1667
1668char*
1669cstash(char *s)
1670{
1671 char *temp;
1672
1673 temp = cnamp;
1674 do {
1675 if(cnamp >= &cnames[cnamsz])
1676 error("too many characters in id's and literals");
1677 else
1678 *cnamp++ = *s;
1679 } while(*s++);
1680 return temp;
1681}
1682
1683long
1684gettok(void)
1685{
1686 long c;
1687 Rune rune;
1688 int i, base, match, reserve;
1689 static int peekline;
1690
1691begin:
1692 reserve = 0;
1693 lineno += peekline;
1694 peekline = 0;
1695 c = Bgetrune(finput);
1696 while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1697 if(c == '\n')
1698 lineno++;
1699 c = Bgetrune(finput);
1700 }
1701
1702 /* skip comment */
1703 if(c == '/') {
1704 lineno += skipcom();
1705 goto begin;
1706 }
1707 switch(c) {
1708 case Beof:
1709 return ENDFILE;
1710
1711 case '{':
1712 Bungetrune(finput);
1713 return '=';
1714
1715 case '<':
1716 /* get, and look up, a type name (union member name) */
1717 i = 0;
1718 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1719 rune = c;
1720 c = runetochar(&tokname[i], &rune);
1721 if(i < NAMESIZE)
1722 i += c;
1723 }
1724 if(c != '>')
1725 error("unterminated < ... > clause");
1726 tokname[i] = 0;
1727 for(i=1; i<=ntypes; i++)
1728 if(!strcmp(typeset[i], tokname)) {
1729 numbval = i;
1730 return TYPENAME;
1731 }
1732 ntypes++;
1733 numbval = ntypes;
1734 typeset[numbval] = cstash(tokname);
1735 return TYPENAME;
1736
1737 case '"':
1738 case '\'':
1739 match = c;
1740 tokname[0] = ' ';
1741 i = 1;
1742 for(;;) {
1743 c = Bgetrune(finput);
1744 if(c == '\n' || c <= 0)
1745 error("illegal or missing ' or \"" );
1746 if(c == '\\') {
1747 tokname[i] = '\\';
1748 if(i < NAMESIZE)
1749 i++;
1750 c = Bgetrune(finput);
1751 } else
1752 if(c == match)
1753 break;
1754 rune = c;
1755 c = runetochar(&tokname[i], &rune);
1756 if(i < NAMESIZE)
1757 i += c;
1758 }
1759 break;
1760
1761 case '%':
1762 case '\\':
1763 switch(c = Bgetrune(finput)) {
1764 case '0': return TERM;
1765 case '<': return LEFT;
1766 case '2': return BINARY;
1767 case '>': return RIGHT;
1768 case '%':
1769 case '\\': return MARK;
1770 case '=': return PREC;
1771 case '{': return LCURLY;
1772 default: reserve = 1;
1773 }
1774
1775 default:
1776 /* number */
1777 if(isdigit(c)) {
1778 numbval = c-'0';
1779 base = (c=='0')? 8: 10;
1780 for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1781 numbval = numbval*base + (c-'0');
1782 Bungetrune(finput);
1783 return NUMBER;
1784 }
1785 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$') {
1786 i = 0;
1787 while(islower(c) || isupper(c) || isdigit(c) ||
1788 c == '-' || c=='_' || c=='.' || c=='$') {
1789 if(reserve && isupper(c))
1790 c += 'a'-'A';
1791 rune = c;
1792 c = runetochar(&tokname[i], &rune);
1793 if(i < NAMESIZE)
1794 i += c;
1795 c = Bgetrune(finput);
1796 }
1797 } else
1798 return c;
1799 Bungetrune(finput);
1800 }
1801 tokname[i] = 0;
1802
1803 /* find a reserved word */
1804 if(reserve) {
1805 for(c=0; resrv[c].name; c++)
1806 if(strcmp(tokname, resrv[c].name) == 0)
1807 return resrv[c].value;
1808 error("invalid escape, or illegal reserved word: %s", tokname);
1809 }
1810
1811 /* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1812 c = Bgetrune(finput);
1813 while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1814 if(c == '\n')
1815 peekline++;
1816 /* look for comments */
1817 if(c == '/')
1818 peekline += skipcom();
1819 c = Bgetrune(finput);
1820 }
1821 if(c == ':')
1822 return IDENTCOLON;
1823 Bungetrune(finput);
1824 return IDENTIFIER;
1825}
1826
1827/*
1828 * determine the type of a symbol
1829 */
1830int
1831fdtype(int t)
1832{
1833 int v;
1834
1835 if(t >= NTBASE)
1836 v = nontrst[t-NTBASE].value;
1837 else
1838 v = TYPE(toklev[t]);
1839 if(v <= 0)
1840 error("must specify type for %s", (t>=NTBASE)?
1841 nontrst[t-NTBASE].name: tokset[t].name);
1842 return v;
1843}
1844
1845int
1846chfind(int t, char *s)
1847{
1848 int i;
1849
1850 if(s[0] == ' ')
1851 t = 0;
1852 TLOOP(i)
1853 if(!strcmp(s, tokset[i].name))
1854 return i;
1855 NTLOOP(i)
1856 if(!strcmp(s, nontrst[i].name))
1857 return NTBASE+i;
1858
1859 /* cannot find name */
1860 if(t > 1)
1861 error("%s should have been defined earlier", s);
1862 return defin(t, s);
1863}
1864
1865/*
1866 * copy the union declaration to the output, and the define file if present
1867 */
1868void
1869cpyunion(void)
1870{
1871 long c;
1872 int level;
1873
wkjcae9bfe2005-02-14 20:27:13 +00001874 Bprint(ftable, "\n");
1875 if(yyline)
1876 Bprint(ftable, "#line\t%d\t\"%s\"\n", lineno, infile);
rsc39405062005-01-13 04:56:07 +00001877 Bprint(ftable, "typedef union ");
1878 if(fdefine != 0)
1879 Bprint(fdefine, "\ntypedef union ");
1880
1881 level = 0;
1882 for(;;) {
1883 if((c=Bgetrune(finput)) == Beof)
1884 error("EOF encountered while processing %%union");
1885 Bputrune(ftable, c);
1886 if(fdefine != 0)
1887 Bputrune(fdefine, c);
1888 switch(c) {
1889 case '\n':
1890 lineno++;
1891 break;
1892 case '{':
1893 level++;
1894 break;
1895 case '}':
1896 level--;
1897
1898 /* we are finished copying */
1899 if(level == 0) {
1900 Bprint(ftable, " YYSTYPE;\n");
wkjcae9bfe2005-02-14 20:27:13 +00001901 if(fdefine != 0){
1902 Bprint(fdefine, "\tYYSTYPE;\n");
1903 if(!yyarg)
1904 Bprint(fdefine, "extern\tYYSTYPE\tyylval;\n");
1905 }
rsc39405062005-01-13 04:56:07 +00001906 return;
1907 }
1908 }
1909 }
1910}
1911
1912/*
1913 * copies code between \{ and \}
1914 */
1915void
1916cpycode(void)
1917{
rsc39405062005-01-13 04:56:07 +00001918 long c;
1919
1920 c = Bgetrune(finput);
1921 if(c == '\n') {
1922 c = Bgetrune(finput);
1923 lineno++;
1924 }
wkjcae9bfe2005-02-14 20:27:13 +00001925 Bprint(ftable, "\n");
1926 if(yyline)
1927 Bprint(ftable, "#line\t%d\t\"%s\"\n", lineno, infile);
rsc39405062005-01-13 04:56:07 +00001928 while(c != Beof) {
1929 if(c == '\\') {
1930 if((c=Bgetrune(finput)) == '}')
1931 return;
1932 Bputc(ftable, '\\');
1933 }
1934 if(c == '%') {
1935 if((c=Bgetrune(finput)) == '}')
1936 return;
1937 Bputc(ftable, '%');
1938 }
1939 Bputrune(ftable, c);
1940 if(c == '\n')
1941 lineno++;
1942 c = Bgetrune(finput);
1943 }
1944 error("eof before %%}");
1945}
1946
1947/*
1948 * skip over comments
1949 * skipcom is called after reading a '/'
1950 */
1951int
1952skipcom(void)
1953{
1954 long c;
1955 int i;
1956
1957 /* i is the number of lines skipped */
1958 i = 0;
1959 if(Bgetrune(finput) != '*')
1960 error("illegal comment");
1961 c = Bgetrune(finput);
1962 while(c != Beof) {
1963 while(c == '*')
1964 if((c=Bgetrune(finput)) == '/')
1965 return i;
1966 if(c == '\n')
1967 i++;
1968 c = Bgetrune(finput);
1969 }
1970 error("EOF inside comment");
1971 return 0;
1972}
1973
1974/*
1975 * copy C action to the next ; or closing }
1976 */
1977void
1978cpyact(int offset)
1979{
1980 long c;
1981 int brac, match, j, s, fnd, tok;
1982
wkjcae9bfe2005-02-14 20:27:13 +00001983 Bprint(faction, "\n");
1984 if(yyline)
1985 Bprint(faction, "#line\t%d\t\"%s\"\n", lineno, infile);
rsc39405062005-01-13 04:56:07 +00001986 brac = 0;
1987
1988loop:
1989 c = Bgetrune(finput);
1990swt:
1991 switch(c) {
1992 case ';':
1993 if(brac == 0) {
1994 Bputrune(faction, c);
1995 return;
1996 }
1997 goto lcopy;
1998
1999 case '{':
2000 brac++;
2001 goto lcopy;
2002
2003 case '$':
2004 s = 1;
2005 tok = -1;
2006 c = Bgetrune(finput);
2007
2008 /* type description */
2009 if(c == '<') {
2010 Bungetrune(finput);
2011 if(gettok() != TYPENAME)
2012 error("bad syntax on $<ident> clause");
2013 tok = numbval;
2014 c = Bgetrune(finput);
2015 }
2016 if(c == '$') {
2017 Bprint(faction, "yyval");
2018
2019 /* put out the proper tag... */
2020 if(ntypes) {
2021 if(tok < 0)
2022 tok = fdtype(*prdptr[nprod]);
2023 Bprint(faction, ".%s", typeset[tok]);
2024 }
2025 goto loop;
2026 }
2027 if(c == '-') {
2028 s = -s;
2029 c = Bgetrune(finput);
2030 }
2031 if(isdigit(c)) {
2032 j = 0;
2033 while(isdigit(c)) {
2034 j = j*10 + (c-'0');
2035 c = Bgetrune(finput);
2036 }
2037 Bungetrune(finput);
2038 j = j*s - offset;
2039 if(j > 0)
2040 error("Illegal use of $%d", j+offset);
2041
2042 dollar:
2043 Bprint(faction, "yypt[-%d].yyv", -j);
2044
2045 /* put out the proper tag */
2046 if(ntypes) {
2047 if(j+offset <= 0 && tok < 0)
2048 error("must specify type of $%d", j+offset);
2049 if(tok < 0)
2050 tok = fdtype(prdptr[nprod][j+offset]);
2051 Bprint(faction, ".%s", typeset[tok]);
2052 }
2053 goto loop;
2054 }
2055 if(isupper(c) || islower(c) || c == '_' || c == '.') {
2056 int tok; /* tok used oustide for type info */
2057
2058 /* look for $name */
2059 Bungetrune(finput);
2060 if(gettok() != IDENTIFIER)
2061 error("$ must be followed by an identifier");
2062 tok = chfind(2, tokname);
2063 if((c = Bgetrune(finput)) != '#') {
2064 Bungetrune(finput);
2065 fnd = -1;
2066 } else
2067 if(gettok() != NUMBER) {
2068 error("# must be followed by number");
2069 fnd = -1;
2070 } else
2071 fnd = numbval;
2072 for(j=1; j<=offset; ++j)
2073 if(tok == prdptr[nprod][j]) {
2074 if(--fnd <= 0) {
2075 j -= offset;
2076 goto dollar;
2077 }
2078 }
2079 error("$name or $name#number not found");
2080 }
2081 Bputc(faction, '$');
2082 if(s < 0 )
2083 Bputc(faction, '-');
2084 goto swt;
2085
2086 case '}':
2087 brac--;
2088 if(brac)
2089 goto lcopy;
2090 Bputrune(faction, c);
2091 return;
2092
2093 case '/':
2094 /* look for comments */
2095 Bputrune(faction, c);
2096 c = Bgetrune(finput);
2097 if(c != '*')
2098 goto swt;
2099
2100 /* it really is a comment */
2101 Bputrune(faction, c);
2102 c = Bgetrune(finput);
2103 while(c >= 0) {
2104 while(c == '*') {
2105 Bputrune(faction, c);
2106 if((c=Bgetrune(finput)) == '/')
2107 goto lcopy;
2108 }
2109 Bputrune(faction, c);
2110 if(c == '\n')
2111 lineno++;
2112 c = Bgetrune(finput);
2113 }
2114 error("EOF inside comment");
2115
2116 case '\'':
2117 /* character constant */
2118 match = '\'';
2119 goto string;
2120
2121 case '"':
2122 /* character string */
2123 match = '"';
2124
2125 string:
2126 Bputrune(faction, c);
2127 while(c = Bgetrune(finput)) {
2128 if(c == '\\') {
2129 Bputrune(faction, c);
2130 c = Bgetrune(finput);
2131 if(c == '\n')
2132 lineno++;
2133 } else
2134 if(c == match)
2135 goto lcopy;
2136 if(c == '\n')
2137 error("newline in string or char. const.");
2138 Bputrune(faction, c);
2139 }
2140 error("EOF in string or character constant");
2141
2142 case Beof:
2143 error("action does not terminate");
2144
2145 case '\n':
2146 lineno++;
2147 goto lcopy;
2148 }
2149
2150lcopy:
2151 Bputrune(faction, c);
2152 goto loop;
2153}
2154
2155void
2156openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2157{
2158 char buf[256];
2159
2160 if(vflag) {
2161 sprint(buf, "%s.%s", stem, FILEU);
2162 foutput = Bopen(buf, OWRITE);
2163 if(foutput == 0)
2164 error("cannot open %s", buf);
2165 }
2166 if(yydebug) {
2167 sprint(buf, "%s.%s", stem, FILEDEBUG);
2168 if((fdebug = Bopen(buf, OWRITE)) == 0)
2169 error("can't open %s", buf);
2170 }
2171 if(dflag) {
2172 sprint(buf, "%s.%s", stem, FILED);
2173 fdefine = Bopen(buf, OWRITE);
2174 if(fdefine == 0)
2175 error("can't create %s", buf);
2176 }
2177 if(ytab == 0)
2178 sprint(buf, "%s.%s", stem, OFILE);
2179 else
2180 strcpy(buf, ytabc);
2181 ftable = Bopen(buf, OWRITE);
2182 if(ftable == 0)
2183 error("cannot open table file %s", buf);
2184}
2185
2186/*
2187 * print the output for the states
2188 */
2189void
2190output(void)
2191{
2192 int i, k, c;
2193 Wset *u, *v;
2194
wkjcae9bfe2005-02-14 20:27:13 +00002195 Bprint(ftable, "static\tconst\tshort yyexca[] =\n{");
rsc39405062005-01-13 04:56:07 +00002196 if(fdebug)
wkjcae9bfe2005-02-14 20:27:13 +00002197 Bprint(fdebug, "static\tconst\tchar* yystates[] =\n{\n");
rsc39405062005-01-13 04:56:07 +00002198
2199 /* output the stuff for state i */
2200 SLOOP(i) {
2201 nolook = tystate[i]!=MUSTLOOKAHEAD;
2202 closure(i);
2203
2204 /* output actions */
2205 nolook = 1;
2206 aryfil(temp1, ntokens+nnonter+1, 0);
2207 WSLOOP(wsets, u) {
2208 c = *(u->pitem);
2209 if(c > 1 && c < NTBASE && temp1[c] == 0) {
2210 WSLOOP(u, v)
2211 if(c == *(v->pitem))
2212 putitem(v->pitem+1, (Lkset*)0);
2213 temp1[c] = state(c);
2214 } else
2215 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2216 temp1[c+ntokens] = amem[indgo[i]+c];
2217 }
2218 if(i == 1)
2219 temp1[1] = ACCEPTCODE;
2220
2221 /* now, we have the shifts; look at the reductions */
2222 lastred = 0;
2223 WSLOOP(wsets, u) {
2224 c = *u->pitem;
2225
2226 /* reduction */
2227 if(c <= 0) {
2228 lastred = -c;
2229 TLOOP(k)
2230 if(BIT(u->ws.lset, k)) {
2231 if(temp1[k] == 0)
2232 temp1[k] = c;
2233 else
2234 if(temp1[k] < 0) { /* reduce/reduce conflict */
2235 if(foutput)
2236 Bprint(foutput,
2237 "\n%d: reduce/reduce conflict"
2238 " (red'ns %d and %d ) on %s",
2239 i, -temp1[k], lastred,
2240 symnam(k));
2241 if(-temp1[k] > lastred)
2242 temp1[k] = -lastred;
2243 zzrrconf++;
2244 } else
2245 /* potential shift/reduce conflict */
2246 precftn( lastred, k, i );
2247 }
2248 }
2249 }
2250 wract(i);
2251 }
2252
2253 if(fdebug)
2254 Bprint(fdebug, "};\n");
2255 Bprint(ftable, "};\n");
2256 Bprint(ftable, "#define YYNPROD %d\n", nprod);
2257 Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE);
2258 if(yydebug)
2259 Bprint(ftable, "#define yydebug %s\n", yydebug);
2260}
2261
2262/*
2263 * pack state i from temp1 into amem
2264 */
2265int
2266apack(int *p, int n)
2267{
2268 int *pp, *qq, *rr, off, *q, *r;
2269
2270 /* we don't need to worry about checking because
2271 * we will only look at entries known to be there...
2272 * eliminate leading and trailing 0's
2273 */
2274
2275 q = p+n;
2276 for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2277 ;
2278 /* no actions */
2279 if(pp > q)
2280 return 0;
2281 p = pp;
2282
2283 /* now, find a place for the elements from p to q, inclusive */
2284 r = &amem[ACTSIZE-1];
2285 for(rr = amem; rr <= r; rr++, off++) {
2286 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2287 if(*pp != 0)
2288 if(*pp != *qq && *qq != 0)
2289 goto nextk;
2290
2291 /* we have found an acceptable k */
2292 if(pkdebug && foutput != 0)
2293 Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2294 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2295 if(*pp) {
2296 if(qq > r)
2297 error("action table overflow");
2298 if(qq > memp)
2299 memp = qq;
2300 *qq = *pp;
2301 }
2302 if(pkdebug && foutput != 0)
2303 for(pp = amem; pp <= memp; pp += 10) {
2304 Bprint(foutput, "\t");
2305 for(qq = pp; qq <= pp+9; qq++)
2306 Bprint(foutput, "%d ", *qq);
2307 Bprint(foutput, "\n");
2308 }
2309 return(off);
2310 nextk:;
2311 }
2312 error("no space in action table");
2313 return 0;
2314}
2315
2316/*
2317 * output the gotos for the nontermninals
2318 */
2319void
2320go2out(void)
2321{
2322 int i, j, k, best, count, cbest, times;
2323
2324 /* mark begining of gotos */
2325 Bprint(ftemp, "$\n");
2326 for(i = 1; i <= nnonter; i++) {
2327 go2gen(i);
2328
2329 /* find the best one to make default */
2330 best = -1;
2331 times = 0;
2332
2333 /* is j the most frequent */
2334 for(j = 0; j <= nstate; j++) {
2335 if(tystate[j] == 0)
2336 continue;
2337 if(tystate[j] == best)
2338 continue;
2339
2340 /* is tystate[j] the most frequent */
2341 count = 0;
2342 cbest = tystate[j];
2343 for(k = j; k <= nstate; k++)
2344 if(tystate[k] == cbest)
2345 count++;
2346 if(count > times) {
2347 best = cbest;
2348 times = count;
2349 }
2350 }
2351
2352 /* best is now the default entry */
2353 zzgobest += times-1;
2354 for(j = 0; j <= nstate; j++)
2355 if(tystate[j] != 0 && tystate[j] != best) {
2356 Bprint(ftemp, "%d,%d,", j, tystate[j]);
2357 zzgoent++;
2358 }
2359
2360 /* now, the default */
2361 if(best == -1)
2362 best = 0;
2363 zzgoent++;
2364 Bprint(ftemp, "%d\n", best);
2365 }
2366}
2367
2368/*
2369 * output the gotos for nonterminal c
2370 */
2371void
2372go2gen(int c)
2373{
2374 int i, work, cc;
2375 Item *p, *q;
2376
2377
2378 /* first, find nonterminals with gotos on c */
2379 aryfil(temp1, nnonter+1, 0);
2380 temp1[c] = 1;
2381 work = 1;
2382 while(work) {
2383 work = 0;
2384 PLOOP(0, i)
2385
2386 /* cc is a nonterminal */
2387 if((cc=prdptr[i][1]-NTBASE) >= 0)
2388 /* cc has a goto on c */
2389 if(temp1[cc] != 0) {
2390
2391 /* thus, the left side of production i does too */
2392 cc = *prdptr[i]-NTBASE;
2393 if(temp1[cc] == 0) {
2394 work = 1;
2395 temp1[cc] = 1;
2396 }
2397 }
2398 }
2399
2400 /* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2401 if(g2debug && foutput != 0) {
2402 Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2403 NTLOOP(i)
2404 if(temp1[i])
2405 Bprint(foutput, "%s ", nontrst[i].name);
2406 Bprint(foutput, "\n");
2407 }
2408
2409 /* now, go through and put gotos into tystate */
2410 aryfil(tystate, nstate, 0);
2411 SLOOP(i)
2412 ITMLOOP(i, p, q)
2413 if((cc = *p->pitem) >= NTBASE)
2414 /* goto on c is possible */
2415 if(temp1[cc-NTBASE]) {
2416 tystate[i] = amem[indgo[i]+c];
2417 break;
2418 }
2419}
2420
2421/*
2422 * decide a shift/reduce conflict by precedence.
2423 * r is a rule number, t a token number
2424 * the conflict is in state s
2425 * temp1[t] is changed to reflect the action
2426 */
2427void
2428precftn(int r, int t, int s)
2429{
2430 int lp, lt, action;
2431
2432 lp = levprd[r];
2433 lt = toklev[t];
2434 if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2435
2436 /* conflict */
2437 if(foutput != 0)
2438 Bprint(foutput,
2439 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2440 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2441 zzsrconf++;
2442 return;
2443 }
2444 if(PLEVEL(lt) == PLEVEL(lp))
2445 action = ASSOC(lt);
2446 else
2447 if(PLEVEL(lt) > PLEVEL(lp))
2448 action = RASC; /* shift */
2449 else
2450 action = LASC; /* reduce */
2451 switch(action) {
2452 case BASC: /* error action */
2453 temp1[t] = ERRCODE;
2454 break;
2455 case LASC: /* reduce */
2456 temp1[t] = -r;
2457 break;
2458 }
2459}
2460
2461/*
2462 * output state i
2463 * temp1 has the actions, lastred the default
2464 */
2465void
2466wract(int i)
2467{
2468 int p, p0, p1, ntimes, tred, count, j, flag;
2469
2470 /* find the best choice for lastred */
2471 lastred = 0;
2472 ntimes = 0;
2473 TLOOP(j) {
2474 if(temp1[j] >= 0)
2475 continue;
2476 if(temp1[j]+lastred == 0)
2477 continue;
2478 /* count the number of appearances of temp1[j] */
2479 count = 0;
2480 tred = -temp1[j];
2481 levprd[tred] |= REDFLAG;
2482 TLOOP(p)
2483 if(temp1[p]+tred == 0)
2484 count++;
2485 if(count > ntimes) {
2486 lastred = tred;
2487 ntimes = count;
2488 }
2489 }
2490
2491 /*
2492 * for error recovery, arrange that, if there is a shift on the
2493 * error recovery token, `error', that the default be the error action
2494 */
2495 if(temp1[2] > 0)
2496 lastred = 0;
2497
2498 /* clear out entries in temp1 which equal lastred */
2499 TLOOP(p)
2500 if(temp1[p]+lastred == 0)
2501 temp1[p] = 0;
2502
2503 wrstate(i);
2504 defact[i] = lastred;
2505 flag = 0;
2506 TLOOP(p0)
2507 if((p1=temp1[p0]) != 0) {
2508 if(p1 < 0) {
2509 p1 = -p1;
2510 goto exc;
2511 }
2512 if(p1 == ACCEPTCODE) {
2513 p1 = -1;
2514 goto exc;
2515 }
2516 if(p1 == ERRCODE) {
2517 p1 = 0;
2518 exc:
2519 if(flag++ == 0)
2520 Bprint(ftable, "-1, %d,\n", i);
2521 Bprint(ftable, "\t%d, %d,\n", p0, p1);
2522 zzexcp++;
2523 continue;
2524 }
2525 Bprint(ftemp, "%d,%d,", p0, p1);
2526 zzacent++;
2527 }
2528 if(flag) {
2529 defact[i] = -2;
2530 Bprint(ftable, "\t-2, %d,\n", lastred);
2531 }
2532 Bprint(ftemp, "\n");
2533}
2534
2535/*
2536 * writes state i
2537 */
2538void
2539wrstate(int i)
2540{
2541 int j0, j1;
2542 Item *pp, *qq;
2543 Wset *u;
2544
2545 if(fdebug) {
2546 if(lastred) {
2547 Bprint(fdebug, " 0, /*%d*/\n", i);
2548 } else {
2549 Bprint(fdebug, " \"");
2550 ITMLOOP(i, pp, qq)
2551 Bprint(fdebug, "%s\\n", writem(pp->pitem));
2552 if(tystate[i] == MUSTLOOKAHEAD)
2553 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2554 if(*u->pitem < 0)
2555 Bprint(fdebug, "%s\\n", writem(u->pitem));
2556 Bprint(fdebug, "\", /*%d*/\n", i);
2557 }
2558 }
2559 if(foutput == 0)
2560 return;
2561 Bprint(foutput, "\nstate %d\n", i);
2562 ITMLOOP(i, pp, qq)
2563 Bprint(foutput, "\t%s\n", writem(pp->pitem));
2564 if(tystate[i] == MUSTLOOKAHEAD)
2565 /* print out empty productions in closure */
2566 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2567 if(*u->pitem < 0)
2568 Bprint(foutput, "\t%s\n", writem(u->pitem));
2569
2570 /* check for state equal to another */
2571 TLOOP(j0)
2572 if((j1=temp1[j0]) != 0) {
2573 Bprint(foutput, "\n\t%s ", symnam(j0));
2574 /* shift, error, or accept */
2575 if(j1 > 0) {
2576 if(j1 == ACCEPTCODE)
2577 Bprint(foutput, "accept");
2578 else
2579 if(j1 == ERRCODE)
2580 Bprint(foutput, "error");
2581 else
2582 Bprint(foutput, "shift %d", j1);
2583 } else
2584 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2585 }
2586
2587 /* output the final production */
2588 if(lastred)
2589 Bprint(foutput, "\n\t. reduce %d (src line %d)\n\n",
2590 lastred, rlines[lastred]);
2591 else
2592 Bprint(foutput, "\n\t. error\n\n");
2593
2594 /* now, output nonterminal actions */
2595 j1 = ntokens;
2596 for(j0 = 1; j0 <= nnonter; j0++) {
2597 j1++;
2598 if(temp1[j1])
2599 Bprint(foutput, "\t%s goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2600 }
2601}
2602
2603void
2604warray(char *s, int *v, int n)
2605{
2606 int i;
2607
wkjcae9bfe2005-02-14 20:27:13 +00002608 Bprint(ftable, "static\tconst\tshort %s[] =\n{", s);
rsc39405062005-01-13 04:56:07 +00002609 for(i=0;;) {
2610 if(i%10 == 0)
2611 Bprint(ftable, "\n");
2612 Bprint(ftable, "%4d", v[i]);
2613 i++;
2614 if(i >= n) {
2615 Bprint(ftable, "\n};\n");
2616 break;
2617 }
2618 Bprint(ftable, ",");
2619 }
2620}
2621
2622/*
2623 * in order to free up the mem and amem arrays for the optimizer,
2624 * and still be able to output yyr1, etc., after the sizes of
2625 * the action array is known, we hide the nonterminals
2626 * derived by productions in levprd.
2627 */
2628
2629void
2630hideprod(void)
2631{
2632 int i, j;
2633
2634 j = 0;
2635 levprd[0] = 0;
2636 PLOOP(1, i) {
2637 if(!(levprd[i] & REDFLAG)) {
2638 j++;
2639 if(foutput != 0)
2640 Bprint(foutput, "Rule not reduced: %s\n", writem(prdptr[i]));
2641 }
2642 levprd[i] = *prdptr[i] - NTBASE;
2643 }
2644 if(j)
2645 print("%d rules never reduced\n", j);
2646}
2647
2648void
2649callopt(void)
2650{
2651 int i, *p, j, k, *q;
2652
2653 /* read the arrays from tempfile and set parameters */
2654 finput = Bopen(tempname, OREAD);
2655 if(finput == 0)
2656 error("optimizer cannot open tempfile");
2657
2658 pgo[0] = 0;
2659 temp1[0] = 0;
2660 nstate = 0;
2661 nnonter = 0;
2662 for(;;) {
2663 switch(gtnm()) {
2664 case '\n':
2665 nstate++;
2666 pmem--;
2667 temp1[nstate] = pmem - mem0;
2668 case ',':
2669 continue;
2670 case '$':
2671 break;
2672 default:
2673 error("bad tempfile %s", tempname);
2674 }
2675 break;
2676 }
2677
2678 pmem--;
2679 temp1[nstate] = yypgo[0] = pmem - mem0;
2680 for(;;) {
2681 switch(gtnm()) {
2682 case '\n':
2683 nnonter++;
2684 yypgo[nnonter] = pmem-mem0;
2685 case ',':
2686 continue;
2687 case -1:
2688 break;
2689 default:
2690 error("bad tempfile");
2691 }
2692 break;
2693 }
2694 pmem--;
2695 yypgo[nnonter--] = pmem - mem0;
2696 for(i = 0; i < nstate; i++) {
2697 k = 32000;
2698 j = 0;
2699 q = mem0 + temp1[i+1];
2700 for(p = mem0 + temp1[i]; p < q ; p += 2) {
2701 if(*p > j)
2702 j = *p;
2703 if(*p < k)
2704 k = *p;
2705 }
2706 /* nontrivial situation */
2707 if(k <= j) {
2708 /* j is now the range */
2709/* j -= k; *//* call scj */
2710 if(k > maxoff)
2711 maxoff = k;
2712 }
2713 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2714 if(j > maxspr)
2715 maxspr = j;
2716 }
2717
2718 /* initialize ggreed table */
2719 for(i = 1; i <= nnonter; i++) {
2720 ggreed[i] = 1;
2721 j = 0;
2722
2723 /* minimum entry index is always 0 */
2724 q = mem0 + yypgo[i+1] - 1;
2725 for(p = mem0+yypgo[i]; p < q ; p += 2) {
2726 ggreed[i] += 2;
2727 if(*p > j)
2728 j = *p;
2729 }
2730 ggreed[i] = ggreed[i] + 2*j;
2731 if(j > maxoff)
2732 maxoff = j;
2733 }
2734
2735 /* now, prepare to put the shift actions into the amem array */
2736 for(i = 0; i < ACTSIZE; i++)
2737 amem[i] = 0;
2738 maxa = amem;
2739 for(i = 0; i < nstate; i++) {
2740 if(tystate[i] == 0 && adb > 1)
2741 Bprint(ftable, "State %d: null\n", i);
2742 indgo[i] = YYFLAG1;
2743 }
2744 while((i = nxti()) != NOMORE)
2745 if(i >= 0)
2746 stin(i);
2747 else
2748 gin(-i);
2749
2750 /* print amem array */
2751 if(adb > 2 )
2752 for(p = amem; p <= maxa; p += 10) {
2753 Bprint(ftable, "%4d ", (int)(p-amem));
2754 for(i = 0; i < 10; ++i)
2755 Bprint(ftable, "%4d ", p[i]);
2756 Bprint(ftable, "\n");
2757 }
2758
2759 /* write out the output appropriate to the language */
2760 aoutput();
2761 osummary();
2762 ZAPFILE(tempname);
2763}
2764
2765void
2766gin(int i)
2767{
2768 int *p, *r, *s, *q1, *q2;
2769
2770 /* enter gotos on nonterminal i into array amem */
2771 ggreed[i] = 0;
2772
2773 q2 = mem0+ yypgo[i+1] - 1;
2774 q1 = mem0 + yypgo[i];
2775
2776 /* now, find amem place for it */
2777 for(p = amem; p < &amem[ACTSIZE]; p++) {
2778 if(*p)
2779 continue;
2780 for(r = q1; r < q2; r += 2) {
2781 s = p + *r + 1;
2782 if(*s)
2783 goto nextgp;
2784 if(s > maxa)
2785 if((maxa = s) > &amem[ACTSIZE])
2786 error("a array overflow");
2787 }
2788 /* we have found amem spot */
2789 *p = *q2;
2790 if(p > maxa)
2791 if((maxa = p) > &amem[ACTSIZE])
2792 error("a array overflow");
2793 for(r = q1; r < q2; r += 2) {
2794 s = p + *r + 1;
2795 *s = r[1];
2796 }
2797 pgo[i] = p-amem;
2798 if(adb > 1)
2799 Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2800 return;
2801
2802 nextgp:;
2803 }
2804 error("cannot place goto %d\n", i);
2805}
2806
2807void
2808stin(int i)
2809{
2810 int *r, *s, n, flag, j, *q1, *q2;
2811
2812 tystate[i] = 0;
2813
2814 /* enter state i into the amem array */
2815 q2 = mem0+temp1[i+1];
2816 q1 = mem0+temp1[i];
2817 /* find an acceptable place */
2818 for(n = -maxoff; n < ACTSIZE; n++) {
2819 flag = 0;
2820 for(r = q1; r < q2; r += 2) {
2821 if((s = *r + n + amem) < amem)
2822 goto nextn;
2823 if(*s == 0)
2824 flag++;
2825 else
2826 if(*s != r[1])
2827 goto nextn;
2828 }
2829
2830 /* check that the position equals another only if the states are identical */
2831 for(j=0; j<nstate; j++) {
2832 if(indgo[j] == n) {
2833
2834 /* we have some disagreement */
2835 if(flag)
2836 goto nextn;
2837 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2838
2839 /* states are equal */
2840 indgo[i] = n;
2841 if(adb > 1)
2842 Bprint(ftable,
2843 "State %d: entry at %d equals state %d\n",
2844 i, n, j);
2845 return;
2846 }
2847
2848 /* we have some disagreement */
2849 goto nextn;
2850 }
2851 }
2852
2853 for(r = q1; r < q2; r += 2) {
2854 if((s = *r+n+amem) >= &amem[ACTSIZE])
2855 error("out of space in optimizer a array");
2856 if(s > maxa)
2857 maxa = s;
2858 if(*s != 0 && *s != r[1])
2859 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2860 *s = r[1];
2861 }
2862 indgo[i] = n;
2863 if(adb > 1)
2864 Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2865 return;
2866 nextn:;
2867 }
2868 error("Error; failure to place state %d\n", i);
2869}
2870
2871/*
2872 * finds the next i
2873 */
2874int
2875nxti(void)
2876{
2877 int i, max, maxi;
2878
2879 max = 0;
2880 maxi = 0;
2881 for(i = 1; i <= nnonter; i++)
2882 if(ggreed[i] >= max) {
2883 max = ggreed[i];
2884 maxi = -i;
2885 }
2886 for(i = 0; i < nstate; ++i)
2887 if(tystate[i] >= max) {
2888 max = tystate[i];
2889 maxi = i;
2890 }
2891 if(nxdb)
2892 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2893 if(max == 0)
2894 return NOMORE;
2895 return maxi;
2896}
2897
2898/*
2899 * write summary
2900 */
2901void
2902osummary(void)
2903{
2904
2905 int i, *p;
2906
2907 if(foutput == 0)
2908 return;
2909 i = 0;
2910 for(p = maxa; p >= amem; p--)
2911 if(*p == 0)
2912 i++;
2913
2914 Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2915 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2916 Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2917 Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2918}
2919
2920/*
2921 * this version is for C
2922 * write out the optimized parser
2923 */
2924void
2925aoutput(void)
2926{
2927 Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2928 arout("yyact", amem, (maxa-amem)+1);
2929 arout("yypact", indgo, nstate);
2930 arout("yypgo", pgo, nnonter+1);
2931}
2932
2933void
2934arout(char *s, int *v, int n)
2935{
2936 int i;
2937
wkjcae9bfe2005-02-14 20:27:13 +00002938 Bprint(ftable, "static\tconst\tshort %s[] =\n{", s);
rsc39405062005-01-13 04:56:07 +00002939 for(i = 0; i < n;) {
2940 if(i%10 == 0)
2941 Bprint(ftable, "\n");
2942 Bprint(ftable, "%4d", v[i]);
2943 i++;
2944 if(i == n)
2945 Bprint(ftable, "\n};\n");
2946 else
2947 Bprint(ftable, ",");
2948 }
2949}
2950
2951/*
2952 * read and convert an integer from the standard input
2953 * return the terminating character
2954 * blanks, tabs, and newlines are ignored
2955 */
2956int
2957gtnm(void)
2958{
2959 int sign, val, c;
2960
2961 sign = 0;
2962 val = 0;
2963 while((c=Bgetrune(finput)) != Beof) {
2964 if(isdigit(c)) {
2965 val = val*10 + c-'0';
2966 continue;
2967 }
2968 if(c == '-') {
2969 sign = 1;
2970 continue;
2971 }
2972 break;
2973 }
2974 if(sign)
2975 val = -val;
2976 *pmem++ = val;
2977 if(pmem >= &mem0[MEMSIZE])
2978 error("out of space");
2979 return c;
2980}