| /* join F1 F2 on stuff */ |
| #include <u.h> |
| #include <libc.h> |
| #include <stdio.h> |
| #include <ctype.h> |
| #define F1 0 |
| #define F2 1 |
| #define F0 3 |
| #define NFLD 100 /* max field per line */ |
| #define comp() runecmp(ppi[F1][j1],ppi[F2][j2]) |
| FILE *f[2]; |
| Rune buf[2][BUFSIZ]; /*input lines */ |
| Rune *ppi[2][NFLD+1]; /* pointers to fields in lines */ |
| Rune *s1,*s2; |
| #define j1 joinj1 |
| #define j2 joinj2 |
| |
| int j1 = 1; /* join of this field of file 1 */ |
| int j2 = 1; /* join of this field of file 2 */ |
| int olist[2*NFLD]; /* output these fields */ |
| int olistf[2*NFLD]; /* from these files */ |
| int no; /* number of entries in olist */ |
| Rune sep1 = ' '; /* default field separator */ |
| Rune sep2 = '\t'; |
| char *sepstr=" "; |
| int discard; /* count of truncated lines */ |
| Rune null[BUFSIZ]/* = L""*/; |
| int a1; |
| int a2; |
| |
| char *getoptarg(int*, char***); |
| void output(int, int); |
| int input(int); |
| void oparse(char*); |
| void error(char*, char*); |
| void seek1(void), seek2(void); |
| Rune *strtorune(Rune *, char *); |
| |
| |
| void |
| main(int argc, char **argv) |
| { |
| int i; |
| |
| while (argc > 1 && argv[1][0] == '-') { |
| if (argv[1][1] == '\0') |
| break; |
| switch (argv[1][1]) { |
| case '-': |
| argc--; |
| argv++; |
| goto proceed; |
| case 'a': |
| switch(*getoptarg(&argc, &argv)) { |
| case '1': |
| a1++; |
| break; |
| case '2': |
| a2++; |
| break; |
| default: |
| error("incomplete option -a",""); |
| } |
| break; |
| case 'e': |
| strtorune(null, getoptarg(&argc, &argv)); |
| break; |
| case 't': |
| sepstr=getoptarg(&argc, &argv); |
| chartorune(&sep1, sepstr); |
| sep2 = sep1; |
| break; |
| case 'o': |
| if(argv[1][2]!=0 || |
| argc>2 && strchr(argv[2],',')!=0) |
| oparse(getoptarg(&argc, &argv)); |
| else for (no = 0; no<2*NFLD && argc>2; no++){ |
| if (argv[2][0] == '1' && argv[2][1] == '.') { |
| olistf[no] = F1; |
| olist[no] = atoi(&argv[2][2]); |
| } else if (argv[2][0] == '2' && argv[2][1] == '.') { |
| olist[no] = atoi(&argv[2][2]); |
| olistf[no] = F2; |
| } else if (argv[2][0] == '0') |
| olistf[no] = F0; |
| else |
| break; |
| argc--; |
| argv++; |
| } |
| break; |
| case 'j': |
| if(argc <= 2) |
| break; |
| if (argv[1][2] == '1') |
| j1 = atoi(argv[2]); |
| else if (argv[1][2] == '2') |
| j2 = atoi(argv[2]); |
| else |
| j1 = j2 = atoi(argv[2]); |
| argc--; |
| argv++; |
| break; |
| case '1': |
| j1 = atoi(getoptarg(&argc, &argv)); |
| break; |
| case '2': |
| j2 = atoi(getoptarg(&argc, &argv)); |
| break; |
| } |
| argc--; |
| argv++; |
| } |
| proceed: |
| for (i = 0; i < no; i++) |
| if (olist[i]-- > NFLD) /* 0 origin */ |
| error("field number too big in -o",""); |
| if (argc != 3) |
| error("usage: join [-1 x -2 y] [-o list] file1 file2",""); |
| j1--; |
| j2--; /* everyone else believes in 0 origin */ |
| s1 = ppi[F1][j1]; |
| s2 = ppi[F2][j2]; |
| if (strcmp(argv[1], "-") == 0) |
| f[F1] = stdin; |
| else if ((f[F1] = fopen(argv[1], "r")) == 0) |
| error("can't open %s", argv[1]); |
| if(strcmp(argv[2], "-") == 0) { |
| f[F2] = stdin; |
| } else if ((f[F2] = fopen(argv[2], "r")) == 0) |
| error("can't open %s", argv[2]); |
| |
| if(ftell(f[F2]) >= 0) |
| seek2(); |
| else if(ftell(f[F1]) >= 0) |
| seek1(); |
| else |
| error("neither file is randomly accessible",""); |
| if (discard) |
| error("some input line was truncated", ""); |
| exits(""); |
| } |
| int runecmp(Rune *a, Rune *b){ |
| while(*a==*b){ |
| if(*a=='\0') return 0; |
| a++; |
| b++; |
| } |
| if(*a<*b) return -1; |
| return 1; |
| } |
| char *runetostr(char *buf, Rune *r){ |
| char *s; |
| for(s=buf;*r;r++) s+=runetochar(s, r); |
| *s='\0'; |
| return buf; |
| } |
| Rune *strtorune(Rune *buf, char *s){ |
| Rune *r; |
| for(r=buf;*s;r++) s+=chartorune(r, s); |
| *r='\0'; |
| return buf; |
| } |
| /* lazy. there ought to be a clean way to combine seek1 & seek2 */ |
| #define get1() n1=input(F1) |
| #define get2() n2=input(F2) |
| void |
| seek2(void) |
| { |
| int n1, n2; |
| int top2=0; |
| int bot2 = ftell(f[F2]); |
| get1(); |
| get2(); |
| while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { |
| if(n1>0 && n2>0 && comp()>0 || n1==0) { |
| if(a2) output(0, n2); |
| bot2 = ftell(f[F2]); |
| get2(); |
| } else if(n1>0 && n2>0 && comp()<0 || n2==0) { |
| if(a1) output(n1, 0); |
| get1(); |
| } else /*(n1>0 && n2>0 && comp()==0)*/ { |
| while(n2>0 && comp()==0) { |
| output(n1, n2); |
| top2 = ftell(f[F2]); |
| get2(); |
| } |
| fseek(f[F2], bot2, 0); |
| get2(); |
| get1(); |
| for(;;) { |
| if(n1>0 && n2>0 && comp()==0) { |
| output(n1, n2); |
| get2(); |
| } else if(n1>0 && n2>0 && comp()<0 || n2==0) { |
| fseek(f[F2], bot2, 0); |
| get2(); |
| get1(); |
| } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{ |
| fseek(f[F2], top2, 0); |
| bot2 = top2; |
| get2(); |
| break; |
| } |
| } |
| } |
| } |
| } |
| void |
| seek1(void) |
| { |
| int n1, n2; |
| int top1=0; |
| int bot1 = ftell(f[F1]); |
| get1(); |
| get2(); |
| while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { |
| if(n1>0 && n2>0 && comp()>0 || n1==0) { |
| if(a2) output(0, n2); |
| get2(); |
| } else if(n1>0 && n2>0 && comp()<0 || n2==0) { |
| if(a1) output(n1, 0); |
| bot1 = ftell(f[F1]); |
| get1(); |
| } else /*(n1>0 && n2>0 && comp()==0)*/ { |
| while(n2>0 && comp()==0) { |
| output(n1, n2); |
| top1 = ftell(f[F1]); |
| get1(); |
| } |
| fseek(f[F1], bot1, 0); |
| get2(); |
| get1(); |
| for(;;) { |
| if(n1>0 && n2>0 && comp()==0) { |
| output(n1, n2); |
| get1(); |
| } else if(n1>0 && n2>0 && comp()>0 || n1==0) { |
| fseek(f[F1], bot1, 0); |
| get2(); |
| get1(); |
| } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{ |
| fseek(f[F1], top1, 0); |
| bot1 = top1; |
| get1(); |
| break; |
| } |
| } |
| } |
| } |
| } |
| |
| int |
| input(int n) /* get input line and split into fields */ |
| { |
| register int i, c; |
| Rune *bp; |
| Rune **pp; |
| char line[BUFSIZ]; |
| |
| bp = buf[n]; |
| pp = ppi[n]; |
| if (fgets(line, BUFSIZ, f[n]) == 0) |
| return(0); |
| strtorune(bp, line); |
| i = 0; |
| do { |
| i++; |
| if (sep1 == ' ') /* strip multiples */ |
| while ((c = *bp) == sep1 || c == sep2) |
| bp++; /* skip blanks */ |
| *pp++ = bp; /* record beginning */ |
| while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0') |
| bp++; |
| *bp++ = '\0'; /* mark end by overwriting blank */ |
| } while (c != '\n' && c != '\0' && i < NFLD-1); |
| if (c != '\n') |
| discard++; |
| |
| *pp = 0; |
| return(i); |
| } |
| |
| void |
| output(int on1, int on2) /* print items from olist */ |
| { |
| int i; |
| Rune *temp; |
| char buf[BUFSIZ]; |
| |
| if (no <= 0) { /* default case */ |
| printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2])); |
| for (i = 0; i < on1; i++) |
| if (i != j1) |
| printf("%s%s", sepstr, runetostr(buf, ppi[F1][i])); |
| for (i = 0; i < on2; i++) |
| if (i != j2) |
| printf("%s%s", sepstr, runetostr(buf, ppi[F2][i])); |
| printf("\n"); |
| } else { |
| for (i = 0; i < no; i++) { |
| if (olistf[i]==F0 && on1>j1) |
| temp = ppi[F1][j1]; |
| else if (olistf[i]==F0 && on2>j2) |
| temp = ppi[F2][j2]; |
| else { |
| temp = ppi[olistf[i]][olist[i]]; |
| if(olistf[i]==F1 && on1<=olist[i] || |
| olistf[i]==F2 && on2<=olist[i] || |
| *temp==0) |
| temp = null; |
| } |
| printf("%s", runetostr(buf, temp)); |
| if (i == no - 1) |
| printf("\n"); |
| else |
| printf("%s", sepstr); |
| } |
| } |
| } |
| |
| void |
| error(char *s1, char *s2) |
| { |
| fprintf(stderr, "join: "); |
| fprintf(stderr, s1, s2); |
| fprintf(stderr, "\n"); |
| exits(s1); |
| } |
| |
| char * |
| getoptarg(int *argcp, char ***argvp) |
| { |
| int argc = *argcp; |
| char **argv = *argvp; |
| if(argv[1][2] != 0) |
| return &argv[1][2]; |
| if(argc<=2 || argv[2][0]=='-') |
| error("incomplete option %s", argv[1]); |
| *argcp = argc-1; |
| *argvp = ++argv; |
| return argv[1]; |
| } |
| |
| void |
| oparse(char *s) |
| { |
| for (no = 0; no<2*NFLD && *s; no++, s++) { |
| switch(*s) { |
| case 0: |
| return; |
| case '0': |
| olistf[no] = F0; |
| break; |
| case '1': |
| case '2': |
| if(s[1] == '.' && isdigit((uchar)s[2])) { |
| olistf[no] = *s=='1'? F1: F2; |
| olist[no] = atoi(s += 2); |
| break; |
| } /* fall thru */ |
| default: |
| error("invalid -o list", ""); |
| } |
| if(s[1] == ',') |
| s++; |
| } |
| } |