blob: 88fb9286481994a367b2327aaba04546fc2be554 [file] [log] [blame]
rscbc7cb1a2003-11-23 18:04:47 +00001/*
2 * Deal with duplicated lines in a file
3 */
4#include <u.h>
5#include <libc.h>
6#include <bio.h>
7#include <ctype.h>
8
9#define SIZE 8000
10
11int fields = 0;
12int letters = 0;
13int linec = 0;
14char mode;
15int uniq;
16char *b1, *b2;
17long bsize;
18Biobuf fin;
19Biobuf fout;
20
21int gline(char *buf);
22void pline(char *buf);
23int equal(char *b1, char *b2);
24char* skip(char *s);
25
26void
27main(int argc, char *argv[])
28{
29 int f;
30
31 bsize = SIZE;
32 b1 = malloc(bsize);
33 b2 = malloc(bsize);
34 f = 0;
35 while(argc > 1) {
36 if(*argv[1] == '-') {
rsc1c171e32005-07-19 18:00:07 +000037 if(isdigit((uchar)argv[1][1]))
rscbc7cb1a2003-11-23 18:04:47 +000038 fields = atoi(&argv[1][1]);
39 else
40 mode = argv[1][1];
41 argc--;
42 argv++;
43 continue;
44 }
45 if(*argv[1] == '+') {
46 letters = atoi(&argv[1][1]);
47 argc--;
48 argv++;
49 continue;
50 }
51 f = open(argv[1], 0);
52 if(f < 0) {
53 fprint(2, "cannot open %s\n", argv[1]);
54 exits("open");
55 }
56 break;
57 }
58 if(argc > 2) {
59 fprint(2, "unexpected argument %s\n", argv[2]);
60 exits("arg");
61 }
62 Binit(&fin, f, OREAD);
63 Binit(&fout, 1, OWRITE);
64
65 if(gline(b1))
66 exits(0);
67 for(;;) {
68 linec++;
69 if(gline(b2)) {
70 pline(b1);
71 exits(0);
72 }
73 if(!equal(b1, b2)) {
74 pline(b1);
75 linec = 0;
76 do {
77 linec++;
78 if(gline(b1)) {
79 pline(b2);
80 exits(0);
81 }
82 } while(equal(b2, b1));
83 pline(b2);
84 linec = 0;
85 }
86 }
87}
88
89int
90gline(char *buf)
91{
92 char *p;
93
94 p = Brdline(&fin, '\n');
95 if(p == 0)
96 return 1;
97 if(fin.rdline >= bsize-1) {
98 fprint(2, "line too long\n");
99 exits("too long");
100 }
101 memmove(buf, p, fin.rdline);
102 buf[fin.rdline-1] = 0;
103 return 0;
104}
105
106void
107pline(char *buf)
108{
109
110 switch(mode) {
111
112 case 'u':
113 if(uniq) {
114 uniq = 0;
115 return;
116 }
117 break;
118
119 case 'd':
120 if(uniq)
121 break;
122 return;
123
124 case 'c':
125 Bprint(&fout, "%4d ", linec);
126 }
127 uniq = 0;
128 Bprint(&fout, "%s\n", buf);
129}
130
131int
132equal(char *b1, char *b2)
133{
134 char c;
135
136 if(fields || letters) {
137 b1 = skip(b1);
138 b2 = skip(b2);
139 }
140 for(;;) {
141 c = *b1++;
142 if(c != *b2++) {
143 if(c == 0 && mode == 's')
144 return 1;
145 return 0;
146 }
147 if(c == 0) {
148 uniq++;
149 return 1;
150 }
151 }
152}
153
154char*
155skip(char *s)
156{
157 int nf, nl;
158
159 nf = nl = 0;
160 while(nf++ < fields) {
161 while(*s == ' ' || *s == '\t')
162 s++;
163 while(!(*s == ' ' || *s == '\t' || *s == 0) )
164 s++;
165 }
166 while(nl++ < letters && *s != 0)
167 s++;
168 return s;
169}