blob: e758786ce8c65abd7d7572ab9ae2f042a5b43970 [file] [log] [blame]
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <ctype.h>
#include <regexp.h>
char digit[] = "0123456789";
char *suffix = "";
char *stem = "x";
char suff[] = "aa";
char name[200];
Biobuf bout;
Biobuf *output = &bout;
extern int nextfile(void);
extern int matchfile(Resub*);
extern void openf(void);
extern char *fold(char*,int);
extern void usage(void);
extern void badexp(void);
void
main(int argc, char *argv[])
{
Reprog *exp;
char *pattern = 0;
int n = 1000;
char *line;
int xflag = 0;
int iflag = 0;
Biobuf bin;
Biobuf *b = &bin;
char buf[256];
ARGBEGIN {
case 'l':
case 'n':
n=atoi(EARGF(usage()));
break;
case 'e':
pattern = strdup(EARGF(usage()));
break;
case 'f':
stem = strdup(EARGF(usage()));
break;
case 's':
suffix = strdup(EARGF(usage()));
break;
case 'x':
xflag++;
break;
case 'i':
iflag++;
break;
default:
usage();
break;
} ARGEND;
if(argc < 0 || argc > 1)
usage();
if(argc != 0) {
b = Bopen(argv[0], OREAD);
if(b == nil) {
fprint(2, "split: can't open %s: %r\n", argv[0]);
exits("open");
}
} else
Binit(b, 0, OREAD);
if(pattern) {
if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern)))
badexp();
while((line=Brdline(b,'\n')) != 0) {
Resub match[2];
memset(match, 0, sizeof match);
line[Blinelen(b)-1] = 0;
if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,match,2)) {
if(matchfile(match) && xflag)
continue;
} else if(output == 0)
nextfile(); /* at most once */
Bwrite(output, line, Blinelen(b)-1);
Bputc(output, '\n');
}
} else {
int linecnt = n;
while((line=Brdline(b,'\n')) != 0) {
if(++linecnt > n) {
nextfile();
linecnt = 1;
}
Bwrite(output, line, Blinelen(b));
}
/*
* in case we didn't end with a newline, tack whatever's
* left onto the last file
*/
while((n = Bread(b, buf, sizeof(buf))) > 0)
Bwrite(output, buf, n);
}
if(b != nil)
Bterm(b);
exits(0);
}
int
nextfile(void)
{
static int canopen = 1;
if(suff[0] > 'z') {
if(canopen)
fprint(2, "split: file %szz not split\n",stem);
canopen = 0;
} else {
strcpy(name, stem);
strcat(name, suff);
if(++suff[1] > 'z')
suff[1] = 'a', ++suff[0];
openf();
}
return canopen;
}
int
matchfile(Resub *match)
{
if(match[1].s.sp) {
int len = match[1].e.ep - match[1].s.sp;
strncpy(name, match[1].s.sp, len);
strcpy(name+len, suffix);
openf();
return 1;
}
return nextfile();
}
void
openf(void)
{
static int fd = 0;
Bflush(output);
Bterm(output);
if(fd > 0)
close(fd);
fd = create(name,OWRITE,0666);
if(fd < 0) {
fprint(2, "grep: can't create %s: %r\n", name);
exits("create");
}
Binit(output, fd, OWRITE);
}
char *
fold(char *s, int n)
{
static char *fline;
static int linesize = 0;
char *t;
if(linesize < n+1){
fline = realloc(fline,n+1);
linesize = n+1;
}
for(t=fline; *t++ = tolower((uchar)*s++); )
continue;
/* we assume the 'A'-'Z' only appear as themselves
* in a utf encoding.
*/
return fline;
}
void
usage(void)
{
fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
exits("usage");
}
void
badexp(void)
{
fprint(2, "split: bad regular expression\n");
exits("bad regular expression");
}