/* | |
* Deal with duplicated lines in a file | |
*/ | |
#include <u.h> | |
#include <libc.h> | |
#include <bio.h> | |
#include <ctype.h> | |
#define SIZE 8000 | |
int fields = 0; | |
int letters = 0; | |
int linec = 0; | |
char mode; | |
int uniq; | |
char *b1, *b2; | |
long bsize; | |
Biobuf fin; | |
Biobuf fout; | |
int gline(char *buf); | |
void pline(char *buf); | |
int equal(char *b1, char *b2); | |
char* skip(char *s); | |
void | |
main(int argc, char *argv[]) | |
{ | |
int f; | |
bsize = SIZE; | |
b1 = malloc(bsize); | |
b2 = malloc(bsize); | |
f = 0; | |
while(argc > 1) { | |
if(*argv[1] == '-') { | |
if(isdigit((uchar)argv[1][1])) | |
fields = atoi(&argv[1][1]); | |
else | |
mode = argv[1][1]; | |
argc--; | |
argv++; | |
continue; | |
} | |
if(*argv[1] == '+') { | |
letters = atoi(&argv[1][1]); | |
argc--; | |
argv++; | |
continue; | |
} | |
f = open(argv[1], 0); | |
if(f < 0) { | |
fprint(2, "cannot open %s\n", argv[1]); | |
exits("open"); | |
} | |
break; | |
} | |
if(argc > 2) { | |
fprint(2, "unexpected argument %s\n", argv[2]); | |
exits("arg"); | |
} | |
Binit(&fin, f, OREAD); | |
Binit(&fout, 1, OWRITE); | |
if(gline(b1)) | |
exits(0); | |
for(;;) { | |
linec++; | |
if(gline(b2)) { | |
pline(b1); | |
exits(0); | |
} | |
if(!equal(b1, b2)) { | |
pline(b1); | |
linec = 0; | |
do { | |
linec++; | |
if(gline(b1)) { | |
pline(b2); | |
exits(0); | |
} | |
} while(equal(b2, b1)); | |
pline(b2); | |
linec = 0; | |
} | |
} | |
} | |
int | |
gline(char *buf) | |
{ | |
char *p; | |
p = Brdline(&fin, '\n'); | |
if(p == 0) | |
return 1; | |
if(fin.rdline >= bsize-1) { | |
fprint(2, "line too long\n"); | |
exits("too long"); | |
} | |
memmove(buf, p, fin.rdline); | |
buf[fin.rdline-1] = 0; | |
return 0; | |
} | |
void | |
pline(char *buf) | |
{ | |
switch(mode) { | |
case 'u': | |
if(uniq) { | |
uniq = 0; | |
return; | |
} | |
break; | |
case 'd': | |
if(uniq) | |
break; | |
return; | |
case 'c': | |
Bprint(&fout, "%4d ", linec); | |
} | |
uniq = 0; | |
Bprint(&fout, "%s\n", buf); | |
} | |
int | |
equal(char *b1, char *b2) | |
{ | |
char c; | |
if(fields || letters) { | |
b1 = skip(b1); | |
b2 = skip(b2); | |
} | |
for(;;) { | |
c = *b1++; | |
if(c != *b2++) { | |
if(c == 0 && mode == 's') | |
return 1; | |
return 0; | |
} | |
if(c == 0) { | |
uniq++; | |
return 1; | |
} | |
} | |
} | |
char* | |
skip(char *s) | |
{ | |
int nf, nl; | |
nf = nl = 0; | |
while(nf++ < fields) { | |
while(*s == ' ' || *s == '\t') | |
s++; | |
while(!(*s == ' ' || *s == '\t' || *s == 0) ) | |
s++; | |
} | |
while(nl++ < letters && *s != 0) | |
s++; | |
return s; | |
} |