rsc | bc7cb1a | 2003-11-23 18:04:47 +0000 | [diff] [blame] | 1 | /* join F1 F2 on stuff */ |
| 2 | #include <u.h> |
| 3 | #include <libc.h> |
| 4 | #include <stdio.h> |
| 5 | #include <ctype.h> |
| 6 | #define F1 0 |
| 7 | #define F2 1 |
| 8 | #define F0 3 |
| 9 | #define NFLD 100 /* max field per line */ |
| 10 | #define comp() runecmp(ppi[F1][j1],ppi[F2][j2]) |
| 11 | FILE *f[2]; |
| 12 | Rune buf[2][BUFSIZ]; /*input lines */ |
| 13 | Rune *ppi[2][NFLD+1]; /* pointers to fields in lines */ |
| 14 | Rune *s1,*s2; |
| 15 | #define j1 joinj1 |
| 16 | #define j2 joinj2 |
| 17 | |
| 18 | int j1 = 1; /* join of this field of file 1 */ |
| 19 | int j2 = 1; /* join of this field of file 2 */ |
| 20 | int olist[2*NFLD]; /* output these fields */ |
| 21 | int olistf[2*NFLD]; /* from these files */ |
| 22 | int no; /* number of entries in olist */ |
| 23 | Rune sep1 = ' '; /* default field separator */ |
| 24 | Rune sep2 = '\t'; |
| 25 | char *sepstr=" "; |
| 26 | int discard; /* count of truncated lines */ |
| 27 | Rune null[BUFSIZ]/* = L""*/; |
| 28 | int a1; |
| 29 | int a2; |
| 30 | |
| 31 | char *getoptarg(int*, char***); |
| 32 | void output(int, int); |
| 33 | int input(int); |
| 34 | void oparse(char*); |
| 35 | void error(char*, char*); |
| 36 | void seek1(void), seek2(void); |
| 37 | Rune *strtorune(Rune *, char *); |
| 38 | |
| 39 | |
| 40 | void |
| 41 | main(int argc, char **argv) |
| 42 | { |
| 43 | int i; |
| 44 | |
| 45 | while (argc > 1 && argv[1][0] == '-') { |
| 46 | if (argv[1][1] == '\0') |
| 47 | break; |
| 48 | switch (argv[1][1]) { |
| 49 | case '-': |
| 50 | argc--; |
| 51 | argv++; |
| 52 | goto proceed; |
| 53 | case 'a': |
| 54 | switch(*getoptarg(&argc, &argv)) { |
| 55 | case '1': |
| 56 | a1++; |
| 57 | break; |
| 58 | case '2': |
| 59 | a2++; |
| 60 | break; |
| 61 | default: |
| 62 | error("incomplete option -a",""); |
| 63 | } |
| 64 | break; |
| 65 | case 'e': |
| 66 | strtorune(null, getoptarg(&argc, &argv)); |
| 67 | break; |
| 68 | case 't': |
| 69 | sepstr=getoptarg(&argc, &argv); |
| 70 | chartorune(&sep1, sepstr); |
| 71 | sep2 = sep1; |
| 72 | break; |
| 73 | case 'o': |
| 74 | if(argv[1][2]!=0 || |
| 75 | argc>2 && strchr(argv[2],',')!=0) |
| 76 | oparse(getoptarg(&argc, &argv)); |
| 77 | else for (no = 0; no<2*NFLD && argc>2; no++){ |
| 78 | if (argv[2][0] == '1' && argv[2][1] == '.') { |
| 79 | olistf[no] = F1; |
| 80 | olist[no] = atoi(&argv[2][2]); |
| 81 | } else if (argv[2][0] == '2' && argv[2][1] == '.') { |
| 82 | olist[no] = atoi(&argv[2][2]); |
| 83 | olistf[no] = F2; |
| 84 | } else if (argv[2][0] == '0') |
| 85 | olistf[no] = F0; |
| 86 | else |
| 87 | break; |
| 88 | argc--; |
| 89 | argv++; |
| 90 | } |
| 91 | break; |
| 92 | case 'j': |
| 93 | if(argc <= 2) |
| 94 | break; |
| 95 | if (argv[1][2] == '1') |
| 96 | j1 = atoi(argv[2]); |
| 97 | else if (argv[1][2] == '2') |
| 98 | j2 = atoi(argv[2]); |
| 99 | else |
| 100 | j1 = j2 = atoi(argv[2]); |
| 101 | argc--; |
| 102 | argv++; |
| 103 | break; |
| 104 | case '1': |
| 105 | j1 = atoi(getoptarg(&argc, &argv)); |
| 106 | break; |
| 107 | case '2': |
| 108 | j2 = atoi(getoptarg(&argc, &argv)); |
| 109 | break; |
| 110 | } |
| 111 | argc--; |
| 112 | argv++; |
| 113 | } |
| 114 | proceed: |
| 115 | for (i = 0; i < no; i++) |
| 116 | if (olist[i]-- > NFLD) /* 0 origin */ |
| 117 | error("field number too big in -o",""); |
| 118 | if (argc != 3) |
| 119 | error("usage: join [-1 x -2 y] [-o list] file1 file2",""); |
| 120 | j1--; |
| 121 | j2--; /* everyone else believes in 0 origin */ |
| 122 | s1 = ppi[F1][j1]; |
| 123 | s2 = ppi[F2][j2]; |
| 124 | if (strcmp(argv[1], "-") == 0) |
| 125 | f[F1] = stdin; |
| 126 | else if ((f[F1] = fopen(argv[1], "r")) == 0) |
| 127 | error("can't open %s", argv[1]); |
| 128 | if(strcmp(argv[2], "-") == 0) { |
| 129 | f[F2] = stdin; |
| 130 | } else if ((f[F2] = fopen(argv[2], "r")) == 0) |
| 131 | error("can't open %s", argv[2]); |
| 132 | |
| 133 | if(ftell(f[F2]) >= 0) |
| 134 | seek2(); |
| 135 | else if(ftell(f[F1]) >= 0) |
| 136 | seek1(); |
| 137 | else |
| 138 | error("neither file is randomly accessible",""); |
| 139 | if (discard) |
| 140 | error("some input line was truncated", ""); |
| 141 | exits(""); |
| 142 | } |
| 143 | int runecmp(Rune *a, Rune *b){ |
| 144 | while(*a==*b){ |
| 145 | if(*a=='\0') return 0; |
| 146 | a++; |
| 147 | b++; |
| 148 | } |
| 149 | if(*a<*b) return -1; |
| 150 | return 1; |
| 151 | } |
| 152 | char *runetostr(char *buf, Rune *r){ |
| 153 | char *s; |
| 154 | for(s=buf;*r;r++) s+=runetochar(s, r); |
| 155 | *s='\0'; |
| 156 | return buf; |
| 157 | } |
| 158 | Rune *strtorune(Rune *buf, char *s){ |
| 159 | Rune *r; |
| 160 | for(r=buf;*s;r++) s+=chartorune(r, s); |
| 161 | *r='\0'; |
| 162 | return buf; |
| 163 | } |
| 164 | /* lazy. there ought to be a clean way to combine seek1 & seek2 */ |
| 165 | #define get1() n1=input(F1) |
| 166 | #define get2() n2=input(F2) |
| 167 | void |
| 168 | seek2() |
| 169 | { |
| 170 | int n1, n2; |
| 171 | int top2=0; |
| 172 | int bot2 = ftell(f[F2]); |
| 173 | get1(); |
| 174 | get2(); |
| 175 | while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { |
| 176 | if(n1>0 && n2>0 && comp()>0 || n1==0) { |
| 177 | if(a2) output(0, n2); |
| 178 | bot2 = ftell(f[F2]); |
| 179 | get2(); |
| 180 | } else if(n1>0 && n2>0 && comp()<0 || n2==0) { |
| 181 | if(a1) output(n1, 0); |
| 182 | get1(); |
| 183 | } else /*(n1>0 && n2>0 && comp()==0)*/ { |
| 184 | while(n2>0 && comp()==0) { |
| 185 | output(n1, n2); |
| 186 | top2 = ftell(f[F2]); |
| 187 | get2(); |
| 188 | } |
| 189 | fseek(f[F2], bot2, 0); |
| 190 | get2(); |
| 191 | get1(); |
| 192 | for(;;) { |
| 193 | if(n1>0 && n2>0 && comp()==0) { |
| 194 | output(n1, n2); |
| 195 | get2(); |
| 196 | } else if(n1>0 && n2>0 && comp()<0 || n2==0) { |
| 197 | fseek(f[F2], bot2, 0); |
| 198 | get2(); |
| 199 | get1(); |
| 200 | } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{ |
| 201 | fseek(f[F2], top2, 0); |
| 202 | bot2 = top2; |
| 203 | get2(); |
| 204 | break; |
| 205 | } |
| 206 | } |
| 207 | } |
| 208 | } |
| 209 | } |
| 210 | void |
| 211 | seek1() |
| 212 | { |
| 213 | int n1, n2; |
| 214 | int top1=0; |
| 215 | int bot1 = ftell(f[F1]); |
| 216 | get1(); |
| 217 | get2(); |
| 218 | while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { |
| 219 | if(n1>0 && n2>0 && comp()>0 || n1==0) { |
| 220 | if(a2) output(0, n2); |
| 221 | get2(); |
| 222 | } else if(n1>0 && n2>0 && comp()<0 || n2==0) { |
| 223 | if(a1) output(n1, 0); |
| 224 | bot1 = ftell(f[F1]); |
| 225 | get1(); |
| 226 | } else /*(n1>0 && n2>0 && comp()==0)*/ { |
| 227 | while(n2>0 && comp()==0) { |
| 228 | output(n1, n2); |
| 229 | top1 = ftell(f[F1]); |
| 230 | get1(); |
| 231 | } |
| 232 | fseek(f[F1], bot1, 0); |
| 233 | get2(); |
| 234 | get1(); |
| 235 | for(;;) { |
| 236 | if(n1>0 && n2>0 && comp()==0) { |
| 237 | output(n1, n2); |
| 238 | get1(); |
| 239 | } else if(n1>0 && n2>0 && comp()>0 || n1==0) { |
| 240 | fseek(f[F1], bot1, 0); |
| 241 | get2(); |
| 242 | get1(); |
| 243 | } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{ |
| 244 | fseek(f[F1], top1, 0); |
| 245 | bot1 = top1; |
| 246 | get1(); |
| 247 | break; |
| 248 | } |
| 249 | } |
| 250 | } |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | int |
| 255 | input(int n) /* get input line and split into fields */ |
| 256 | { |
| 257 | register int i, c; |
| 258 | Rune *bp; |
| 259 | Rune **pp; |
| 260 | char line[BUFSIZ]; |
| 261 | |
| 262 | bp = buf[n]; |
| 263 | pp = ppi[n]; |
| 264 | if (fgets(line, BUFSIZ, f[n]) == 0) |
| 265 | return(0); |
| 266 | strtorune(bp, line); |
| 267 | i = 0; |
| 268 | do { |
| 269 | i++; |
| 270 | if (sep1 == ' ') /* strip multiples */ |
| 271 | while ((c = *bp) == sep1 || c == sep2) |
| 272 | bp++; /* skip blanks */ |
| 273 | *pp++ = bp; /* record beginning */ |
| 274 | while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0') |
| 275 | bp++; |
| 276 | *bp++ = '\0'; /* mark end by overwriting blank */ |
| 277 | } while (c != '\n' && c != '\0' && i < NFLD-1); |
| 278 | if (c != '\n') |
| 279 | discard++; |
| 280 | |
| 281 | *pp = 0; |
| 282 | return(i); |
| 283 | } |
| 284 | |
| 285 | void |
| 286 | output(int on1, int on2) /* print items from olist */ |
| 287 | { |
| 288 | int i; |
| 289 | Rune *temp; |
| 290 | char buf[BUFSIZ]; |
| 291 | |
| 292 | if (no <= 0) { /* default case */ |
| 293 | printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2])); |
| 294 | for (i = 0; i < on1; i++) |
| 295 | if (i != j1) |
| 296 | printf("%s%s", sepstr, runetostr(buf, ppi[F1][i])); |
| 297 | for (i = 0; i < on2; i++) |
| 298 | if (i != j2) |
| 299 | printf("%s%s", sepstr, runetostr(buf, ppi[F2][i])); |
| 300 | printf("\n"); |
| 301 | } else { |
| 302 | for (i = 0; i < no; i++) { |
| 303 | if (olistf[i]==F0 && on1>j1) |
| 304 | temp = ppi[F1][j1]; |
| 305 | else if (olistf[i]==F0 && on2>j2) |
| 306 | temp = ppi[F2][j2]; |
| 307 | else { |
| 308 | temp = ppi[olistf[i]][olist[i]]; |
| 309 | if(olistf[i]==F1 && on1<=olist[i] || |
| 310 | olistf[i]==F2 && on2<=olist[i] || |
| 311 | *temp==0) |
| 312 | temp = null; |
| 313 | } |
| 314 | printf("%s", runetostr(buf, temp)); |
| 315 | if (i == no - 1) |
| 316 | printf("\n"); |
| 317 | else |
| 318 | printf("%s", sepstr); |
| 319 | } |
| 320 | } |
| 321 | } |
| 322 | |
| 323 | void |
| 324 | error(char *s1, char *s2) |
| 325 | { |
| 326 | fprintf(stderr, "join: "); |
| 327 | fprintf(stderr, s1, s2); |
| 328 | fprintf(stderr, "\n"); |
| 329 | exits(s1); |
| 330 | } |
| 331 | |
| 332 | char * |
| 333 | getoptarg(int *argcp, char ***argvp) |
| 334 | { |
| 335 | int argc = *argcp; |
| 336 | char **argv = *argvp; |
| 337 | if(argv[1][2] != 0) |
| 338 | return &argv[1][2]; |
| 339 | if(argc<=2 || argv[2][0]=='-') |
| 340 | error("incomplete option %s", argv[1]); |
| 341 | *argcp = argc-1; |
| 342 | *argvp = ++argv; |
| 343 | return argv[1]; |
| 344 | } |
| 345 | |
| 346 | void |
| 347 | oparse(char *s) |
| 348 | { |
| 349 | for (no = 0; no<2*NFLD && *s; no++, s++) { |
| 350 | switch(*s) { |
| 351 | case 0: |
| 352 | return; |
| 353 | case '0': |
| 354 | olistf[no] = F0; |
| 355 | break; |
| 356 | case '1': |
| 357 | case '2': |
| 358 | if(s[1] == '.' && isdigit(s[2])) { |
| 359 | olistf[no] = *s=='1'? F1: F2; |
| 360 | olist[no] = atoi(s += 2); |
| 361 | break; |
| 362 | } /* fall thru */ |
| 363 | default: |
| 364 | error("invalid -o list", ""); |
| 365 | } |
| 366 | if(s[1] == ',') |
| 367 | s++; |
| 368 | } |
| 369 | } |