rsc | 892de79 | 2004-04-19 05:54:21 +0000 | [diff] [blame] | 1 | #include <u.h> |
| 2 | #include <libc.h> |
| 3 | #include <bio.h> |
| 4 | #include <ctype.h> |
| 5 | |
| 6 | /* |
| 7 | * block up paragraphs, possibly with indentation |
| 8 | */ |
| 9 | |
| 10 | int extraindent = 0; /* how many spaces to indent all lines */ |
| 11 | int indent = 0; /* current value of indent, before extra indent */ |
| 12 | int length = 70; /* how many columns per output line */ |
| 13 | int join = 1; /* can lines be joined? */ |
| 14 | int maxtab = 8; |
| 15 | Biobuf bin; |
| 16 | Biobuf bout; |
| 17 | |
| 18 | typedef struct Word Word; |
| 19 | struct Word{ |
| 20 | int bol; |
| 21 | int indent; |
| 22 | char text[1]; |
| 23 | }; |
| 24 | |
| 25 | void fmt(void); |
| 26 | |
| 27 | void |
| 28 | usage(void) |
| 29 | { |
| 30 | fprint(2, "usage: %s [-j] [-i indent] [-l length] [file...]\n", argv0); |
| 31 | exits("usage"); |
| 32 | } |
| 33 | |
| 34 | void |
| 35 | main(int argc, char **argv) |
| 36 | { |
| 37 | int i, f; |
| 38 | char *s, *err; |
| 39 | |
| 40 | ARGBEGIN{ |
| 41 | case 'i': |
| 42 | extraindent = atoi(EARGF(usage())); |
| 43 | break; |
| 44 | case 'j': |
| 45 | join = 0; |
| 46 | break; |
| 47 | case 'w': |
| 48 | case 'l': |
| 49 | length = atoi(EARGF(usage())); |
| 50 | break; |
| 51 | default: |
| 52 | usage(); |
| 53 | }ARGEND |
| 54 | |
| 55 | if(length <= indent){ |
| 56 | fprint(2, "%s: line length<=indentation\n", argv0); |
| 57 | exits("length"); |
| 58 | } |
| 59 | |
| 60 | s=getenv("tabstop"); |
| 61 | if(s!=nil && atoi(s)>0) |
| 62 | maxtab=atoi(s); |
| 63 | err = nil; |
| 64 | Binit(&bout, 1, OWRITE); |
| 65 | if(argc <= 0){ |
| 66 | Binit(&bin, 0, OREAD); |
| 67 | fmt(); |
| 68 | }else{ |
| 69 | for(i=0; i<argc; i++){ |
| 70 | f = open(argv[i], OREAD); |
| 71 | if(f < 0){ |
| 72 | fprint(2, "%s: can't open %s: %r\n", argv0, argv[i]); |
| 73 | err = "open"; |
| 74 | }else{ |
| 75 | Binit(&bin, f, OREAD); |
| 76 | fmt(); |
| 77 | Bterm(&bin); |
| 78 | if(i != argc-1) |
| 79 | Bputc(&bout, '\n'); |
| 80 | } |
| 81 | } |
| 82 | } |
| 83 | exits(err); |
| 84 | } |
| 85 | |
| 86 | int |
| 87 | indentof(char **linep) |
| 88 | { |
| 89 | int i, ind; |
| 90 | char *line; |
| 91 | |
| 92 | ind = 0; |
| 93 | line = *linep; |
| 94 | for(i=0; line[i]; i++) |
| 95 | switch(line[i]){ |
| 96 | default: |
| 97 | *linep = line; |
| 98 | return ind; |
| 99 | case ' ': |
| 100 | ind++; |
| 101 | break; |
| 102 | case '\t': |
| 103 | ind += maxtab; |
| 104 | ind -= ind%maxtab; |
| 105 | break; |
| 106 | } |
| 107 | |
| 108 | /* plain white space doesn't change the indent */ |
| 109 | *linep = ""; |
| 110 | return indent; |
| 111 | } |
| 112 | |
| 113 | Word** |
| 114 | addword(Word **words, int *nwordp, char *s, int l, int indent, int bol) |
| 115 | { |
| 116 | Word *w; |
| 117 | |
| 118 | w = malloc(sizeof(Word)+l+1); |
| 119 | memmove(w->text, s, l); |
| 120 | w->text[l] = '\0'; |
| 121 | w->indent = indent; |
| 122 | w->bol = bol; |
| 123 | words = realloc(words, (*nwordp+1)*sizeof(Word*)); |
| 124 | words[(*nwordp)++] = w; |
| 125 | return words; |
| 126 | } |
| 127 | |
| 128 | Word** |
| 129 | parseline(char *line, Word **words, int *nwordp) |
| 130 | { |
| 131 | int ind, l, bol; |
| 132 | |
| 133 | ind = indentof(&line); |
| 134 | indent = ind; |
| 135 | bol = 1; |
| 136 | for(;;){ |
| 137 | /* find next word */ |
| 138 | while(*line==' ' || *line=='\t') |
| 139 | line++; |
| 140 | if(*line == '\0'){ |
| 141 | if(bol) |
| 142 | return addword(words, nwordp, "", 0, -1, bol); |
| 143 | break; |
| 144 | } |
| 145 | /* how long is this word? */ |
| 146 | for(l=0; line[l]; l++) |
| 147 | if(line[l]==' ' || line[l]=='\t') |
| 148 | break; |
| 149 | words = addword(words, nwordp, line, l, indent, bol); |
| 150 | bol = 0; |
| 151 | line += l; |
| 152 | } |
| 153 | return words; |
| 154 | } |
| 155 | |
| 156 | void |
| 157 | printindent(int w) |
| 158 | { |
| 159 | while(w >= maxtab){ |
| 160 | Bputc(&bout, '\t'); |
| 161 | w -= maxtab; |
| 162 | } |
| 163 | while(w > 0){ |
| 164 | Bputc(&bout, ' '); |
| 165 | w--; |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | /* give extra space if word ends with period, etc. */ |
| 170 | int |
| 171 | nspaceafter(char *s) |
| 172 | { |
| 173 | int n; |
| 174 | |
| 175 | n = strlen(s); |
| 176 | if(n < 2) |
| 177 | return 1; |
| 178 | if(isupper(s[0]) && n < 4) |
| 179 | return 1; |
| 180 | if(strchr(".!?", s[n-1]) != nil) |
| 181 | return 2; |
| 182 | return 1; |
| 183 | } |
| 184 | |
| 185 | |
| 186 | void |
| 187 | printwords(Word **w, int nw) |
| 188 | { |
| 189 | int i, j, n, col, nsp; |
| 190 | |
| 191 | /* one output line per loop */ |
| 192 | for(i=0; i<nw; ){ |
| 193 | /* if it's a blank line, print it */ |
| 194 | if(w[i]->indent == -1){ |
| 195 | Bputc(&bout, '\n'); |
| 196 | if(++i == nw) /* out of words */ |
| 197 | break; |
| 198 | } |
| 199 | /* emit leading indent */ |
| 200 | col = extraindent+w[i]->indent; |
| 201 | printindent(col); |
| 202 | /* emit words until overflow; always emit at least one word */ |
| 203 | for(n=0;; n++){ |
| 204 | Bprint(&bout, "%s", w[i]->text); |
| 205 | col += utflen(w[i]->text); |
| 206 | if(++i == nw) |
| 207 | break; /* out of words */ |
| 208 | if(w[i]->indent != w[i-1]->indent) |
| 209 | break; /* indent change */ |
| 210 | nsp = nspaceafter(w[i-1]->text); |
| 211 | if(col+nsp+utflen(w[i]->text) > extraindent+length) |
| 212 | break; /* fold line */ |
| 213 | if(!join && n != 0 && w[i]->bol) |
| 214 | break; |
| 215 | for(j=0; j<nsp; j++) |
| 216 | Bputc(&bout, ' '); /* emit space; another word will follow */ |
| 217 | col += nsp; |
| 218 | } |
| 219 | /* emit newline */ |
| 220 | Bputc(&bout, '\n'); |
| 221 | } |
| 222 | } |
| 223 | |
| 224 | void |
| 225 | fmt(void) |
| 226 | { |
| 227 | char *s; |
| 228 | int i, nw; |
| 229 | Word **w; |
| 230 | |
| 231 | nw = 0; |
| 232 | w = nil; |
| 233 | while((s = Brdstr(&bin, '\n', 1)) != nil){ |
| 234 | w = parseline(s, w, &nw); |
| 235 | free(s); |
| 236 | } |
| 237 | printwords(w, nw); |
| 238 | for(i=0; i<nw; i++) |
| 239 | free(w[i]); |
| 240 | free(w); |
| 241 | } |