| %{ |
| #include "common.h" |
| #include "smtp.h" |
| #include <ctype.h> |
| |
| char *yylp; /* next character to be lex'd */ |
| int yydone; /* tell yylex to give up */ |
| char *yybuffer; /* first parsed character */ |
| char *yyend; /* end of buffer to be parsed */ |
| Node *root; |
| Field *firstfield; |
| Field *lastfield; |
| Node *usender; |
| Node *usys; |
| Node *udate; |
| char *startfield, *endfield; |
| int originator; |
| int destination; |
| int date; |
| int received; |
| int messageid; |
| %} |
| |
| %term WORD |
| %term DATE |
| %term RESENT_DATE |
| %term RETURN_PATH |
| %term FROM |
| %term SENDER |
| %term REPLY_TO |
| %term RESENT_FROM |
| %term RESENT_SENDER |
| %term RESENT_REPLY_TO |
| %term SUBJECT |
| %term TO |
| %term CC |
| %term BCC |
| %term RESENT_TO |
| %term RESENT_CC |
| %term RESENT_BCC |
| %term REMOTE |
| %term PRECEDENCE |
| %term MIMEVERSION |
| %term CONTENTTYPE |
| %term MESSAGEID |
| %term RECEIVED |
| %term MAILER |
| %term BADTOKEN |
| %start msg |
| %% |
| |
| msg : fields |
| | unixfrom '\n' fields |
| ; |
| fields : '\n' |
| { yydone = 1; } |
| | field '\n' |
| | field '\n' fields |
| ; |
| field : dates |
| { date = 1; } |
| | originator |
| { originator = 1; } |
| | destination |
| { destination = 1; } |
| | subject |
| | optional |
| | ignored |
| | received |
| | precedence |
| | error '\n' field |
| ; |
| unixfrom : FROM route_addr unix_date_time REMOTE FROM word |
| { freenode($1); freenode($4); freenode($5); |
| usender = $2; udate = $3; usys = $6; |
| } |
| ; |
| originator : REPLY_TO ':' address_list |
| { newfield(link3($1, $2, $3), 1); } |
| | RETURN_PATH ':' route_addr |
| { newfield(link3($1, $2, $3), 1); } |
| | FROM ':' mailbox_list |
| { newfield(link3($1, $2, $3), 1); } |
| | SENDER ':' mailbox |
| { newfield(link3($1, $2, $3), 1); } |
| | RESENT_REPLY_TO ':' address_list |
| { newfield(link3($1, $2, $3), 1); } |
| | RESENT_SENDER ':' mailbox |
| { newfield(link3($1, $2, $3), 1); } |
| | RESENT_FROM ':' mailbox |
| { newfield(link3($1, $2, $3), 1); } |
| ; |
| dates : DATE ':' date_time |
| { newfield(link3($1, $2, $3), 0); } |
| | RESENT_DATE ':' date_time |
| { newfield(link3($1, $2, $3), 0); } |
| ; |
| destination : TO ':' |
| { newfield(link2($1, $2), 0); } |
| | TO ':' address_list |
| { newfield(link3($1, $2, $3), 0); } |
| | RESENT_TO ':' |
| { newfield(link2($1, $2), 0); } |
| | RESENT_TO ':' address_list |
| { newfield(link3($1, $2, $3), 0); } |
| | CC ':' |
| { newfield(link2($1, $2), 0); } |
| | CC ':' address_list |
| { newfield(link3($1, $2, $3), 0); } |
| | RESENT_CC ':' |
| { newfield(link2($1, $2), 0); } |
| | RESENT_CC ':' address_list |
| { newfield(link3($1, $2, $3), 0); } |
| | BCC ':' |
| { newfield(link2($1, $2), 0); } |
| | BCC ':' address_list |
| { newfield(link3($1, $2, $3), 0); } |
| | RESENT_BCC ':' |
| { newfield(link2($1, $2), 0); } |
| | RESENT_BCC ':' address_list |
| { newfield(link3($1, $2, $3), 0); } |
| ; |
| subject : SUBJECT ':' things |
| { newfield(link3($1, $2, $3), 0); } |
| | SUBJECT ':' |
| { newfield(link2($1, $2), 0); } |
| ; |
| received : RECEIVED ':' things |
| { newfield(link3($1, $2, $3), 0); received++; } |
| | RECEIVED ':' |
| { newfield(link2($1, $2), 0); received++; } |
| ; |
| precedence : PRECEDENCE ':' things |
| { newfield(link3($1, $2, $3), 0); } |
| | PRECEDENCE ':' |
| { newfield(link2($1, $2), 0); } |
| ; |
| ignored : ignoredhdr ':' things |
| { newfield(link3($1, $2, $3), 0); } |
| | ignoredhdr ':' |
| { newfield(link2($1, $2), 0); } |
| ; |
| ignoredhdr : MIMEVERSION | CONTENTTYPE | MESSAGEID { messageid = 1; } | MAILER |
| ; |
| optional : fieldwords ':' things |
| { /* hack to allow same lex for field names and the rest */ |
| if(badfieldname($1)){ |
| freenode($1); |
| freenode($2); |
| freenode($3); |
| return 1; |
| } |
| newfield(link3($1, $2, $3), 0); |
| } |
| | fieldwords ':' |
| { /* hack to allow same lex for field names and the rest */ |
| if(badfieldname($1)){ |
| freenode($1); |
| freenode($2); |
| return 1; |
| } |
| newfield(link2($1, $2), 0); |
| } |
| ; |
| address_list : address |
| | address_list ',' address |
| { $$ = link3($1, $2, $3); } |
| ; |
| address : mailbox |
| | group |
| ; |
| group : phrase ':' address_list ';' |
| { $$ = link2($1, link3($2, $3, $4)); } |
| | phrase ':' ';' |
| { $$ = link3($1, $2, $3); } |
| ; |
| mailbox_list : mailbox |
| | mailbox_list ',' mailbox |
| { $$ = link3($1, $2, $3); } |
| ; |
| mailbox : route_addr |
| | phrase brak_addr |
| { $$ = link2($1, $2); } |
| | brak_addr |
| ; |
| brak_addr : '<' route_addr '>' |
| { $$ = link3($1, $2, $3); } |
| | '<' '>' |
| { $$ = nobody($2); freenode($1); } |
| ; |
| route_addr : route ':' at_addr |
| { $$ = address(concat($1, concat($2, $3))); } |
| | addr_spec |
| ; |
| route : '@' domain |
| { $$ = concat($1, $2); } |
| | route ',' '@' domain |
| { $$ = concat($1, concat($2, concat($3, $4))); } |
| ; |
| addr_spec : local_part |
| { $$ = address($1); } |
| | at_addr |
| ; |
| at_addr : local_part '@' domain |
| { $$ = address(concat($1, concat($2, $3)));} |
| | at_addr '@' domain |
| { $$ = address(concat($1, concat($2, $3)));} |
| ; |
| local_part : word |
| ; |
| domain : word |
| ; |
| phrase : word |
| | phrase word |
| { $$ = link2($1, $2); } |
| ; |
| things : thing |
| | things thing |
| { $$ = link2($1, $2); } |
| ; |
| thing : word | '<' | '>' | '@' | ':' | ';' | ',' |
| ; |
| date_time : things |
| ; |
| unix_date_time : word word word unix_time word word |
| { $$ = link3($1, $3, link3($2, $6, link2($4, $5))); } |
| ; |
| unix_time : word |
| | unix_time ':' word |
| { $$ = link3($1, $2, $3); } |
| ; |
| word : WORD | DATE | RESENT_DATE | RETURN_PATH | FROM | SENDER |
| | REPLY_TO | RESENT_FROM | RESENT_SENDER | RESENT_REPLY_TO |
| | TO | CC | BCC | RESENT_TO | RESENT_CC | RESENT_BCC | REMOTE | SUBJECT |
| | PRECEDENCE | MIMEVERSION | CONTENTTYPE | MESSAGEID | RECEIVED | MAILER |
| ; |
| fieldwords : fieldword |
| | WORD |
| | fieldwords fieldword |
| { $$ = link2($1, $2); } |
| | fieldwords word |
| { $$ = link2($1, $2); } |
| ; |
| fieldword : '<' | '>' | '@' | ';' | ',' |
| ; |
| %% |
| |
| /* |
| * Initialize the parsing. Done once for each header field. |
| */ |
| void |
| yyinit(char *p, int len) |
| { |
| yybuffer = p; |
| yylp = p; |
| yyend = p + len; |
| firstfield = lastfield = 0; |
| received = 0; |
| } |
| |
| /* |
| * keywords identifying header fields we care about |
| */ |
| typedef struct Keyword Keyword; |
| struct Keyword { |
| char *rep; |
| int val; |
| }; |
| |
| /* field names that we need to recognize */ |
| Keyword key[] = { |
| { "date", DATE }, |
| { "resent-date", RESENT_DATE }, |
| { "return_path", RETURN_PATH }, |
| { "from", FROM }, |
| { "sender", SENDER }, |
| { "reply-to", REPLY_TO }, |
| { "resent-from", RESENT_FROM }, |
| { "resent-sender", RESENT_SENDER }, |
| { "resent-reply-to", RESENT_REPLY_TO }, |
| { "to", TO }, |
| { "cc", CC }, |
| { "bcc", BCC }, |
| { "resent-to", RESENT_TO }, |
| { "resent-cc", RESENT_CC }, |
| { "resent-bcc", RESENT_BCC }, |
| { "remote", REMOTE }, |
| { "subject", SUBJECT }, |
| { "precedence", PRECEDENCE }, |
| { "mime-version", MIMEVERSION }, |
| { "content-type", CONTENTTYPE }, |
| { "message-id", MESSAGEID }, |
| { "received", RECEIVED }, |
| { "mailer", MAILER }, |
| { "who-the-hell-cares", WORD } |
| }; |
| |
| /* |
| * Lexical analysis for an rfc822 header field. Continuation lines |
| * are handled in yywhite() when skipping over white space. |
| * |
| */ |
| int |
| yylex(void) |
| { |
| String *t; |
| int quoting; |
| int escaping; |
| char *start; |
| Keyword *kp; |
| int c, d; |
| |
| /* print("lexing\n"); /**/ |
| if(yylp >= yyend) |
| return 0; |
| if(yydone) |
| return 0; |
| |
| quoting = escaping = 0; |
| start = yylp; |
| yylval = malloc(sizeof(Node)); |
| yylval->white = yylval->s = 0; |
| yylval->next = 0; |
| yylval->addr = 0; |
| yylval->start = yylp; |
| for(t = 0; yylp < yyend; yylp++){ |
| c = *yylp & 0xff; |
| |
| /* dump nulls, they can't be in header */ |
| if(c == 0) |
| continue; |
| |
| if(escaping) { |
| escaping = 0; |
| } else if(quoting) { |
| switch(c){ |
| case '\\': |
| escaping = 1; |
| break; |
| case '\n': |
| d = (*(yylp+1))&0xff; |
| if(d != ' ' && d != '\t'){ |
| quoting = 0; |
| yylp--; |
| continue; |
| } |
| break; |
| case '"': |
| quoting = 0; |
| break; |
| } |
| } else { |
| switch(c){ |
| case '\\': |
| escaping = 1; |
| break; |
| case '(': |
| case ' ': |
| case '\t': |
| case '\r': |
| goto out; |
| case '\n': |
| if(yylp == start){ |
| yylp++; |
| /* print("lex(c %c)\n", c); /**/ |
| yylval->end = yylp; |
| return yylval->c = c; |
| } |
| goto out; |
| case '@': |
| case '>': |
| case '<': |
| case ':': |
| case ',': |
| case ';': |
| if(yylp == start){ |
| yylp++; |
| yylval->white = yywhite(); |
| /* print("lex(c %c)\n", c); /**/ |
| yylval->end = yylp; |
| return yylval->c = c; |
| } |
| goto out; |
| case '"': |
| quoting = 1; |
| break; |
| default: |
| break; |
| } |
| } |
| if(t == 0) |
| t = s_new(); |
| s_putc(t, c); |
| } |
| out: |
| yylval->white = yywhite(); |
| if(t) { |
| s_terminate(t); |
| } else /* message begins with white-space! */ |
| return yylval->c = '\n'; |
| yylval->s = t; |
| for(kp = key; kp->val != WORD; kp++) |
| if(cistrcmp(s_to_c(t), kp->rep)==0) |
| break; |
| /* print("lex(%d) %s\n", kp->val-WORD, s_to_c(t)); /**/ |
| yylval->end = yylp; |
| return yylval->c = kp->val; |
| } |
| |
| void |
| yyerror(char *x) |
| { |
| USED(x); |
| |
| /*fprint(2, "parse err: %s\n", x);/**/ |
| } |
| |
| /* |
| * parse white space and comments |
| */ |
| String * |
| yywhite(void) |
| { |
| String *w; |
| int clevel; |
| int c; |
| int escaping; |
| |
| escaping = clevel = 0; |
| for(w = 0; yylp < yyend; yylp++){ |
| c = *yylp & 0xff; |
| |
| /* dump nulls, they can't be in header */ |
| if(c == 0) |
| continue; |
| |
| if(escaping){ |
| escaping = 0; |
| } else if(clevel) { |
| switch(c){ |
| case '\n': |
| /* |
| * look for multiline fields |
| */ |
| if(*(yylp+1)==' ' || *(yylp+1)=='\t') |
| break; |
| else |
| goto out; |
| case '\\': |
| escaping = 1; |
| break; |
| case '(': |
| clevel++; |
| break; |
| case ')': |
| clevel--; |
| break; |
| } |
| } else { |
| switch(c){ |
| case '\\': |
| escaping = 1; |
| break; |
| case '(': |
| clevel++; |
| break; |
| case ' ': |
| case '\t': |
| case '\r': |
| break; |
| case '\n': |
| /* |
| * look for multiline fields |
| */ |
| if(*(yylp+1)==' ' || *(yylp+1)=='\t') |
| break; |
| else |
| goto out; |
| default: |
| goto out; |
| } |
| } |
| if(w == 0) |
| w = s_new(); |
| s_putc(w, c); |
| } |
| out: |
| if(w) |
| s_terminate(w); |
| return w; |
| } |
| |
| /* |
| * link two parsed entries together |
| */ |
| Node* |
| link2(Node *p1, Node *p2) |
| { |
| Node *p; |
| |
| for(p = p1; p->next; p = p->next) |
| ; |
| p->next = p2; |
| return p1; |
| } |
| |
| /* |
| * link three parsed entries together |
| */ |
| Node* |
| link3(Node *p1, Node *p2, Node *p3) |
| { |
| Node *p; |
| |
| for(p = p2; p->next; p = p->next) |
| ; |
| p->next = p3; |
| |
| for(p = p1; p->next; p = p->next) |
| ; |
| p->next = p2; |
| |
| return p1; |
| } |
| |
| /* |
| * make a:b, move all white space after both |
| */ |
| Node* |
| colon(Node *p1, Node *p2) |
| { |
| if(p1->white){ |
| if(p2->white) |
| s_append(p1->white, s_to_c(p2->white)); |
| } else { |
| p1->white = p2->white; |
| p2->white = 0; |
| } |
| |
| s_append(p1->s, ":"); |
| if(p2->s) |
| s_append(p1->s, s_to_c(p2->s)); |
| |
| if(p1->end < p2->end) |
| p1->end = p2->end; |
| freenode(p2); |
| return p1; |
| } |
| |
| /* |
| * concatenate two fields, move all white space after both |
| */ |
| Node* |
| concat(Node *p1, Node *p2) |
| { |
| char buf[2]; |
| |
| if(p1->white){ |
| if(p2->white) |
| s_append(p1->white, s_to_c(p2->white)); |
| } else { |
| p1->white = p2->white; |
| p2->white = 0; |
| } |
| |
| if(p1->s == nil){ |
| buf[0] = p1->c; |
| buf[1] = 0; |
| p1->s = s_new(); |
| s_append(p1->s, buf); |
| } |
| |
| if(p2->s) |
| s_append(p1->s, s_to_c(p2->s)); |
| else { |
| buf[0] = p2->c; |
| buf[1] = 0; |
| s_append(p1->s, buf); |
| } |
| |
| if(p1->end < p2->end) |
| p1->end = p2->end; |
| freenode(p2); |
| return p1; |
| } |
| |
| /* |
| * look for disallowed chars in the field name |
| */ |
| int |
| badfieldname(Node *p) |
| { |
| for(; p; p = p->next){ |
| /* field name can't contain white space */ |
| if(p->white && p->next) |
| return 1; |
| } |
| return 0; |
| } |
| |
| /* |
| * mark as an address |
| */ |
| Node * |
| address(Node *p) |
| { |
| p->addr = 1; |
| return p; |
| } |
| |
| /* |
| * case independent string compare |
| */ |
| int |
| cistrcmp(char *s1, char *s2) |
| { |
| int c1, c2; |
| |
| for(; *s1; s1++, s2++){ |
| c1 = isupper(*s1) ? tolower(*s1) : *s1; |
| c2 = isupper(*s2) ? tolower(*s2) : *s2; |
| if (c1 != c2) |
| return -1; |
| } |
| return *s2; |
| } |
| |
| /* |
| * free a node |
| */ |
| void |
| freenode(Node *p) |
| { |
| Node *tp; |
| |
| while(p){ |
| tp = p->next; |
| if(p->s) |
| s_free(p->s); |
| if(p->white) |
| s_free(p->white); |
| free(p); |
| p = tp; |
| } |
| } |
| |
| |
| /* |
| * an anonymous user |
| */ |
| Node* |
| nobody(Node *p) |
| { |
| if(p->s) |
| s_free(p->s); |
| p->s = s_copy("pOsTmAsTeR"); |
| p->addr = 1; |
| return p; |
| } |
| |
| /* |
| * add anything that was dropped because of a parse error |
| */ |
| void |
| missing(Node *p) |
| { |
| Node *np; |
| char *start, *end; |
| Field *f; |
| String *s; |
| |
| start = yybuffer; |
| if(lastfield != nil){ |
| for(np = lastfield->node; np; np = np->next) |
| start = np->end+1; |
| } |
| |
| end = p->start-1; |
| |
| if(end <= start) |
| return; |
| |
| if(strncmp(start, "From ", 5) == 0) |
| return; |
| |
| np = malloc(sizeof(Node)); |
| np->start = start; |
| np->end = end; |
| np->white = nil; |
| s = s_copy("BadHeader: "); |
| np->s = s_nappend(s, start, end-start); |
| np->next = nil; |
| |
| f = malloc(sizeof(Field)); |
| f->next = 0; |
| f->node = np; |
| f->source = 0; |
| if(firstfield) |
| lastfield->next = f; |
| else |
| firstfield = f; |
| lastfield = f; |
| } |
| |
| /* |
| * create a new field |
| */ |
| void |
| newfield(Node *p, int source) |
| { |
| Field *f; |
| |
| missing(p); |
| |
| f = malloc(sizeof(Field)); |
| f->next = 0; |
| f->node = p; |
| f->source = source; |
| if(firstfield) |
| lastfield->next = f; |
| else |
| firstfield = f; |
| lastfield = f; |
| endfield = startfield; |
| startfield = yylp; |
| } |
| |
| /* |
| * fee a list of fields |
| */ |
| void |
| freefield(Field *f) |
| { |
| Field *tf; |
| |
| while(f){ |
| tf = f->next; |
| freenode(f->node); |
| free(f); |
| f = tf; |
| } |
| } |
| |
| /* |
| * add some white space to a node |
| */ |
| Node* |
| whiten(Node *p) |
| { |
| Node *tp; |
| |
| for(tp = p; tp->next; tp = tp->next) |
| ; |
| if(tp->white == 0) |
| tp->white = s_copy(" "); |
| return p; |
| } |
| |
| void |
| yycleanup(void) |
| { |
| Field *f, *fnext; |
| Node *np, *next; |
| |
| for(f = firstfield; f; f = fnext){ |
| for(np = f->node; np; np = next){ |
| if(np->s) |
| s_free(np->s); |
| if(np->white) |
| s_free(np->white); |
| next = np->next; |
| free(np); |
| } |
| fnext = f->next; |
| free(f); |
| } |
| firstfield = lastfield = 0; |
| } |