|  | /*	join F1 F2 on stuff */ | 
|  | #include <u.h> | 
|  | #include <libc.h> | 
|  | #include <stdio.h> | 
|  | #include <ctype.h> | 
|  | #define F1 0 | 
|  | #define F2 1 | 
|  | #define F0 3 | 
|  | #define	NFLD	100	/* max field per line */ | 
|  | #define comp() runecmp(ppi[F1][j1],ppi[F2][j2]) | 
|  | FILE *f[2]; | 
|  | Rune buf[2][BUFSIZ];	/*input lines */ | 
|  | Rune *ppi[4][NFLD+1];	/* pointers to fields in lines */ | 
|  | Rune *s1,*s2; | 
|  | #define j1 joinj1 | 
|  | #define j2 joinj2 | 
|  |  | 
|  | int	j1	= 1;	/* join of this field of file 1 */ | 
|  | int	j2	= 1;	/* join of this field of file 2 */ | 
|  | int	olist[2*NFLD];	/* output these fields */ | 
|  | int	olistf[2*NFLD];	/* from these files */ | 
|  | int	no;		/* number of entries in olist */ | 
|  | Rune	sep1	= ' ';	/* default field separator */ | 
|  | Rune	sep2	= '\t'; | 
|  | char *sepstr=" "; | 
|  | int	discard;	/* count of truncated lines */ | 
|  | Rune	null[BUFSIZ]/*	= L""*/; | 
|  | int	a1; | 
|  | int 	a2; | 
|  |  | 
|  | char *getoptarg(int*, char***); | 
|  | void output(int, int); | 
|  | int input(int); | 
|  | void oparse(char*); | 
|  | void error(char*, char*); | 
|  | void seek1(void), seek2(void); | 
|  | Rune *strtorune(Rune *, char *); | 
|  |  | 
|  |  | 
|  | void | 
|  | main(int argc, char **argv) | 
|  | { | 
|  | int i; | 
|  |  | 
|  | while (argc > 1 && argv[1][0] == '-') { | 
|  | if (argv[1][1] == '\0') | 
|  | break; | 
|  | switch (argv[1][1]) { | 
|  | case '-': | 
|  | argc--; | 
|  | argv++; | 
|  | goto proceed; | 
|  | case 'a': | 
|  | switch(*getoptarg(&argc, &argv)) { | 
|  | case '1': | 
|  | a1++; | 
|  | break; | 
|  | case '2': | 
|  | a2++; | 
|  | break; | 
|  | default: | 
|  | error("incomplete option -a",""); | 
|  | } | 
|  | break; | 
|  | case 'e': | 
|  | strtorune(null, getoptarg(&argc, &argv)); | 
|  | break; | 
|  | case 't': | 
|  | sepstr=getoptarg(&argc, &argv); | 
|  | chartorune(&sep1, sepstr); | 
|  | sep2 = sep1; | 
|  | break; | 
|  | case 'o': | 
|  | if(argv[1][2]!=0 || | 
|  | argc>2 && strchr(argv[2],',')!=0) | 
|  | oparse(getoptarg(&argc, &argv)); | 
|  | else for (no = 0; no<2*NFLD && argc>2; no++){ | 
|  | if (argv[2][0] == '1' && argv[2][1] == '.') { | 
|  | olistf[no] = F1; | 
|  | olist[no] = atoi(&argv[2][2]); | 
|  | } else if (argv[2][0] == '2' && argv[2][1] == '.') { | 
|  | olist[no] = atoi(&argv[2][2]); | 
|  | olistf[no] = F2; | 
|  | } else if (argv[2][0] == '0') | 
|  | olistf[no] = F0; | 
|  | else | 
|  | break; | 
|  | argc--; | 
|  | argv++; | 
|  | } | 
|  | break; | 
|  | case 'j': | 
|  | if(argc <= 2) | 
|  | break; | 
|  | if (argv[1][2] == '1') | 
|  | j1 = atoi(argv[2]); | 
|  | else if (argv[1][2] == '2') | 
|  | j2 = atoi(argv[2]); | 
|  | else | 
|  | j1 = j2 = atoi(argv[2]); | 
|  | argc--; | 
|  | argv++; | 
|  | break; | 
|  | case '1': | 
|  | j1 = atoi(getoptarg(&argc, &argv)); | 
|  | break; | 
|  | case '2': | 
|  | j2 = atoi(getoptarg(&argc, &argv)); | 
|  | break; | 
|  | } | 
|  | argc--; | 
|  | argv++; | 
|  | } | 
|  | proceed: | 
|  | for (i = 0; i < no; i++) | 
|  | if (olist[i]-- > NFLD)	/* 0 origin */ | 
|  | error("field number too big in -o",""); | 
|  | if (argc != 3) | 
|  | error("usage: join [-1 x -2 y] [-o list] file1 file2",""); | 
|  | j1--; | 
|  | j2--;	/* everyone else believes in 0 origin */ | 
|  | s1 = ppi[F1][j1]; | 
|  | s2 = ppi[F2][j2]; | 
|  | if (strcmp(argv[1], "-") == 0) | 
|  | f[F1] = stdin; | 
|  | else if ((f[F1] = fopen(argv[1], "r")) == 0) | 
|  | error("can't open %s", argv[1]); | 
|  | if(strcmp(argv[2], "-") == 0) { | 
|  | f[F2] = stdin; | 
|  | } else if ((f[F2] = fopen(argv[2], "r")) == 0) | 
|  | error("can't open %s", argv[2]); | 
|  |  | 
|  | if(ftell(f[F2]) >= 0) | 
|  | seek2(); | 
|  | else if(ftell(f[F1]) >= 0) | 
|  | seek1(); | 
|  | else | 
|  | error("neither file is randomly accessible",""); | 
|  | if (discard) | 
|  | error("some input line was truncated", ""); | 
|  | exits(""); | 
|  | } | 
|  | int runecmp(Rune *a, Rune *b){ | 
|  | while(*a==*b){ | 
|  | if(*a=='\0') return 0; | 
|  | a++; | 
|  | b++; | 
|  | } | 
|  | if(*a<*b) return -1; | 
|  | return 1; | 
|  | } | 
|  | char *runetostr(char *buf, Rune *r){ | 
|  | char *s; | 
|  | for(s=buf;*r;r++) s+=runetochar(s, r); | 
|  | *s='\0'; | 
|  | return buf; | 
|  | } | 
|  | Rune *strtorune(Rune *buf, char *s){ | 
|  | Rune *r; | 
|  | for(r=buf;*s;r++) s+=chartorune(r, s); | 
|  | *r='\0'; | 
|  | return buf; | 
|  | } | 
|  | /* lazy.  there ought to be a clean way to combine seek1 & seek2 */ | 
|  | #define get1() n1=input(F1) | 
|  | #define get2() n2=input(F2) | 
|  | void | 
|  | seek2(void) | 
|  | { | 
|  | int n1, n2; | 
|  | int top2=0; | 
|  | int bot2 = ftell(f[F2]); | 
|  | get1(); | 
|  | get2(); | 
|  | while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { | 
|  | if(n1>0 && n2>0 && comp()>0 || n1==0) { | 
|  | if(a2) output(0, n2); | 
|  | bot2 = ftell(f[F2]); | 
|  | get2(); | 
|  | } else if(n1>0 && n2>0 && comp()<0 || n2==0) { | 
|  | if(a1) output(n1, 0); | 
|  | get1(); | 
|  | } else /*(n1>0 && n2>0 && comp()==0)*/ { | 
|  | while(n2>0 && comp()==0) { | 
|  | output(n1, n2); | 
|  | top2 = ftell(f[F2]); | 
|  | get2(); | 
|  | } | 
|  | fseek(f[F2], bot2, 0); | 
|  | get2(); | 
|  | get1(); | 
|  | for(;;) { | 
|  | if(n1>0 && n2>0 && comp()==0) { | 
|  | output(n1, n2); | 
|  | get2(); | 
|  | } else if(n1>0 && n2>0 && comp()<0 || n2==0) { | 
|  | fseek(f[F2], bot2, 0); | 
|  | get2(); | 
|  | get1(); | 
|  | } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{ | 
|  | fseek(f[F2], top2, 0); | 
|  | bot2 = top2; | 
|  | get2(); | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | void | 
|  | seek1(void) | 
|  | { | 
|  | int n1, n2; | 
|  | int top1=0; | 
|  | int bot1 = ftell(f[F1]); | 
|  | get1(); | 
|  | get2(); | 
|  | while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { | 
|  | if(n1>0 && n2>0 && comp()>0 || n1==0) { | 
|  | if(a2) output(0, n2); | 
|  | get2(); | 
|  | } else if(n1>0 && n2>0 && comp()<0 || n2==0) { | 
|  | if(a1) output(n1, 0); | 
|  | bot1 = ftell(f[F1]); | 
|  | get1(); | 
|  | } else /*(n1>0 && n2>0 && comp()==0)*/ { | 
|  | while(n2>0 && comp()==0) { | 
|  | output(n1, n2); | 
|  | top1 = ftell(f[F1]); | 
|  | get1(); | 
|  | } | 
|  | fseek(f[F1], bot1, 0); | 
|  | get2(); | 
|  | get1(); | 
|  | for(;;) { | 
|  | if(n1>0 && n2>0 && comp()==0) { | 
|  | output(n1, n2); | 
|  | get1(); | 
|  | } else if(n1>0 && n2>0 && comp()>0 || n1==0) { | 
|  | fseek(f[F1], bot1, 0); | 
|  | get2(); | 
|  | get1(); | 
|  | } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{ | 
|  | fseek(f[F1], top1, 0); | 
|  | bot1 = top1; | 
|  | get1(); | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | int | 
|  | input(int n)		/* get input line and split into fields */ | 
|  | { | 
|  | register int i, c; | 
|  | Rune *bp; | 
|  | Rune **pp; | 
|  | char line[BUFSIZ]; | 
|  |  | 
|  | bp = buf[n]; | 
|  | pp = ppi[n]; | 
|  | if (fgets(line, BUFSIZ, f[n]) == 0) | 
|  | return(0); | 
|  | strtorune(bp, line); | 
|  | i = 0; | 
|  | do { | 
|  | i++; | 
|  | if (sep1 == ' ')	/* strip multiples */ | 
|  | while ((c = *bp) == sep1 || c == sep2) | 
|  | bp++;	/* skip blanks */ | 
|  | *pp++ = bp;	/* record beginning */ | 
|  | while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0') | 
|  | bp++; | 
|  | *bp++ = '\0';	/* mark end by overwriting blank */ | 
|  | } while (c != '\n' && c != '\0' && i < NFLD-1); | 
|  | if (c != '\n') | 
|  | discard++; | 
|  |  | 
|  | *pp = 0; | 
|  | return(i); | 
|  | } | 
|  |  | 
|  | void | 
|  | output(int on1, int on2)	/* print items from olist */ | 
|  | { | 
|  | int i; | 
|  | Rune *temp; | 
|  | char buf[BUFSIZ]; | 
|  |  | 
|  | if (no <= 0) {	/* default case */ | 
|  | printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2])); | 
|  | for (i = 0; i < on1; i++) | 
|  | if (i != j1) | 
|  | printf("%s%s", sepstr, runetostr(buf, ppi[F1][i])); | 
|  | for (i = 0; i < on2; i++) | 
|  | if (i != j2) | 
|  | printf("%s%s", sepstr, runetostr(buf, ppi[F2][i])); | 
|  | printf("\n"); | 
|  | } else { | 
|  | for (i = 0; i < no; i++) { | 
|  | if (olistf[i]==F0 && on1>j1) | 
|  | temp = ppi[F1][j1]; | 
|  | else if (olistf[i]==F0 && on2>j2) | 
|  | temp = ppi[F2][j2]; | 
|  | else { | 
|  | temp = ppi[olistf[i]][olist[i]]; | 
|  | if(olistf[i]==F1 && on1<=olist[i] || | 
|  | olistf[i]==F2 && on2<=olist[i] || | 
|  | *temp==0) | 
|  | temp = null; | 
|  | } | 
|  | printf("%s", runetostr(buf, temp)); | 
|  | if (i == no - 1) | 
|  | printf("\n"); | 
|  | else | 
|  | printf("%s", sepstr); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void | 
|  | error(char *s1, char *s2) | 
|  | { | 
|  | fprintf(stderr, "join: "); | 
|  | fprintf(stderr, s1, s2); | 
|  | fprintf(stderr, "\n"); | 
|  | exits(s1); | 
|  | } | 
|  |  | 
|  | char * | 
|  | getoptarg(int *argcp, char ***argvp) | 
|  | { | 
|  | int argc = *argcp; | 
|  | char **argv = *argvp; | 
|  | if(argv[1][2] != 0) | 
|  | return &argv[1][2]; | 
|  | if(argc<=2 || argv[2][0]=='-') | 
|  | error("incomplete option %s", argv[1]); | 
|  | *argcp = argc-1; | 
|  | *argvp = ++argv; | 
|  | return argv[1]; | 
|  | } | 
|  |  | 
|  | void | 
|  | oparse(char *s) | 
|  | { | 
|  | for (no = 0; no<2*NFLD && *s; no++, s++) { | 
|  | switch(*s) { | 
|  | case 0: | 
|  | return; | 
|  | case '0': | 
|  | olistf[no] = F0; | 
|  | break; | 
|  | case '1': | 
|  | case '2': | 
|  | if(s[1] == '.' && isdigit((uchar)s[2])) { | 
|  | olistf[no] = *s=='1'? F1: F2; | 
|  | olist[no] = atoi(s += 2); | 
|  | break; | 
|  | } /* fall thru */ | 
|  | default: | 
|  | error("invalid -o list", ""); | 
|  | } | 
|  | if(s[1] == ',') | 
|  | s++; | 
|  | } | 
|  | } |