| /* |
| * Deal with duplicated lines in a file |
| */ |
| #include <u.h> |
| #include <libc.h> |
| #include <bio.h> |
| #include <ctype.h> |
| |
| #define SIZE 8000 |
| |
| int fields = 0; |
| int letters = 0; |
| int linec = 0; |
| char mode; |
| int uniq; |
| char *b1, *b2; |
| long bsize; |
| Biobuf fin; |
| Biobuf fout; |
| |
| int gline(char *buf); |
| void pline(char *buf); |
| int equal(char *b1, char *b2); |
| char* skip(char *s); |
| |
| void |
| main(int argc, char *argv[]) |
| { |
| int f; |
| |
| bsize = SIZE; |
| b1 = malloc(bsize); |
| b2 = malloc(bsize); |
| f = 0; |
| while(argc > 1) { |
| if(*argv[1] == '-') { |
| if(isdigit(argv[1][1])) |
| fields = atoi(&argv[1][1]); |
| else |
| mode = argv[1][1]; |
| argc--; |
| argv++; |
| continue; |
| } |
| if(*argv[1] == '+') { |
| letters = atoi(&argv[1][1]); |
| argc--; |
| argv++; |
| continue; |
| } |
| f = open(argv[1], 0); |
| if(f < 0) { |
| fprint(2, "cannot open %s\n", argv[1]); |
| exits("open"); |
| } |
| break; |
| } |
| if(argc > 2) { |
| fprint(2, "unexpected argument %s\n", argv[2]); |
| exits("arg"); |
| } |
| Binit(&fin, f, OREAD); |
| Binit(&fout, 1, OWRITE); |
| |
| if(gline(b1)) |
| exits(0); |
| for(;;) { |
| linec++; |
| if(gline(b2)) { |
| pline(b1); |
| exits(0); |
| } |
| if(!equal(b1, b2)) { |
| pline(b1); |
| linec = 0; |
| do { |
| linec++; |
| if(gline(b1)) { |
| pline(b2); |
| exits(0); |
| } |
| } while(equal(b2, b1)); |
| pline(b2); |
| linec = 0; |
| } |
| } |
| } |
| |
| int |
| gline(char *buf) |
| { |
| char *p; |
| |
| p = Brdline(&fin, '\n'); |
| if(p == 0) |
| return 1; |
| if(fin.rdline >= bsize-1) { |
| fprint(2, "line too long\n"); |
| exits("too long"); |
| } |
| memmove(buf, p, fin.rdline); |
| buf[fin.rdline-1] = 0; |
| return 0; |
| } |
| |
| void |
| pline(char *buf) |
| { |
| |
| switch(mode) { |
| |
| case 'u': |
| if(uniq) { |
| uniq = 0; |
| return; |
| } |
| break; |
| |
| case 'd': |
| if(uniq) |
| break; |
| return; |
| |
| case 'c': |
| Bprint(&fout, "%4d ", linec); |
| } |
| uniq = 0; |
| Bprint(&fout, "%s\n", buf); |
| } |
| |
| int |
| equal(char *b1, char *b2) |
| { |
| char c; |
| |
| if(fields || letters) { |
| b1 = skip(b1); |
| b2 = skip(b2); |
| } |
| for(;;) { |
| c = *b1++; |
| if(c != *b2++) { |
| if(c == 0 && mode == 's') |
| return 1; |
| return 0; |
| } |
| if(c == 0) { |
| uniq++; |
| return 1; |
| } |
| } |
| } |
| |
| char* |
| skip(char *s) |
| { |
| int nf, nl; |
| |
| nf = nl = 0; |
| while(nf++ < fields) { |
| while(*s == ' ' || *s == '\t') |
| s++; |
| while(!(*s == ' ' || *s == '\t' || *s == 0) ) |
| s++; |
| } |
| while(nl++ < letters && *s != 0) |
| s++; |
| return s; |
| } |