| #include "common.h" |
| #include <ctype.h> |
| |
| void doalldirs(void); |
| void dodir(char*); |
| void dofile(Dir*); |
| void rundir(char*); |
| char* file(char*, char); |
| void warning(char*, void*); |
| void error(char*, void*); |
| int returnmail(char**, char*, char*); |
| void logit(char*, char*, char**); |
| void doload(int); |
| |
| #define HUNK 32 |
| char *cmd; |
| char *root; |
| int debug; |
| int giveup = 2*24*60*60; |
| int load; |
| int limit; |
| |
| /* the current directory */ |
| Dir *dirbuf; |
| long ndirbuf = 0; |
| int nfiles; |
| char *curdir; |
| |
| char *runqlog = "runq"; |
| |
| int *pidlist; |
| char **badsys; /* array of recalcitrant systems */ |
| int nbad; |
| int npid = 50; |
| int sflag; /* single thread per directory */ |
| int aflag; /* all directories */ |
| int Eflag; /* ignore E.xxxxxx dates */ |
| int Rflag; /* no giving up, ever */ |
| |
| void |
| usage(void) |
| { |
| fprint(2, "usage: runq [-adsE] [-q dir] [-l load] [-t time] [-r nfiles] [-n nprocs] q-root cmd\n"); |
| exits(""); |
| } |
| |
| void |
| main(int argc, char **argv) |
| { |
| char *qdir, *x; |
| |
| qdir = 0; |
| |
| ARGBEGIN{ |
| case 'l': |
| x = ARGF(); |
| if(x == 0) |
| usage(); |
| load = atoi(x); |
| if(load < 0) |
| load = 0; |
| break; |
| case 'E': |
| Eflag++; |
| break; |
| case 'R': /* no giving up -- just leave stuff in the queue */ |
| Rflag++; |
| break; |
| case 'a': |
| aflag++; |
| break; |
| case 'd': |
| debug++; |
| break; |
| case 'r': |
| limit = atoi(ARGF()); |
| break; |
| case 's': |
| sflag++; |
| break; |
| case 't': |
| giveup = 60*60*atoi(ARGF()); |
| break; |
| case 'q': |
| qdir = ARGF(); |
| if(qdir == 0) |
| usage(); |
| break; |
| case 'n': |
| npid = atoi(ARGF()); |
| if(npid == 0) |
| usage(); |
| break; |
| }ARGEND; |
| |
| if(argc != 2) |
| usage(); |
| |
| pidlist = malloc(npid*sizeof(*pidlist)); |
| if(pidlist == 0) |
| error("can't malloc", 0); |
| |
| if(aflag == 0 && qdir == 0) { |
| qdir = getuser(); |
| if(qdir == 0) |
| error("unknown user", 0); |
| } |
| root = argv[0]; |
| cmd = argv[1]; |
| |
| if(chdir(root) < 0) |
| error("can't cd to %s", root); |
| |
| doload(1); |
| if(aflag) |
| doalldirs(); |
| else |
| dodir(qdir); |
| doload(0); |
| exits(0); |
| } |
| |
| int |
| emptydir(char *name) |
| { |
| int fd; |
| long n; |
| char buf[2048]; |
| |
| fd = open(name, OREAD); |
| if(fd < 0) |
| return 1; |
| n = read(fd, buf, sizeof(buf)); |
| close(fd); |
| if(n <= 0) { |
| if(debug) |
| fprint(2, "removing directory %s\n", name); |
| syslog(0, runqlog, "rmdir %s", name); |
| sysremove(name); |
| return 1; |
| } |
| return 0; |
| } |
| |
| int |
| forkltd(void) |
| { |
| int i; |
| int pid; |
| |
| for(i = 0; i < npid; i++){ |
| if(pidlist[i] <= 0) |
| break; |
| } |
| |
| while(i >= npid){ |
| pid = waitpid(); |
| if(pid < 0){ |
| syslog(0, runqlog, "forkltd confused"); |
| exits(0); |
| } |
| |
| for(i = 0; i < npid; i++) |
| if(pidlist[i] == pid) |
| break; |
| } |
| pidlist[i] = fork(); |
| return pidlist[i]; |
| } |
| |
| /* |
| * run all user directories, must be bootes (or root on unix) to do this |
| */ |
| void |
| doalldirs(void) |
| { |
| Dir *db; |
| int fd; |
| long i, n; |
| |
| |
| fd = open(".", OREAD); |
| if(fd == -1){ |
| warning("reading %s", root); |
| return; |
| } |
| n = sysdirreadall(fd, &db); |
| if(n > 0){ |
| for(i=0; i<n; i++){ |
| if(db[i].qid.type & QTDIR){ |
| if(emptydir(db[i].name)) |
| continue; |
| switch(forkltd()){ |
| case -1: |
| syslog(0, runqlog, "out of procs"); |
| doload(0); |
| exits(0); |
| case 0: |
| if(sysdetach() < 0) |
| error("%r", 0); |
| dodir(db[i].name); |
| exits(0); |
| default: |
| break; |
| } |
| } |
| } |
| free(db); |
| } |
| close(fd); |
| } |
| |
| /* |
| * cd to a user directory and run it |
| */ |
| void |
| dodir(char *name) |
| { |
| curdir = name; |
| |
| if(chdir(name) < 0){ |
| warning("cd to %s", name); |
| return; |
| } |
| if(debug) |
| fprint(2, "running %s\n", name); |
| rundir(name); |
| chdir(".."); |
| } |
| |
| /* |
| * run the current directory |
| */ |
| void |
| rundir(char *name) |
| { |
| int fd; |
| long i; |
| |
| if(aflag && sflag) |
| fd = sysopenlocked(".", OREAD); |
| else |
| fd = open(".", OREAD); |
| if(fd == -1){ |
| warning("reading %s", name); |
| return; |
| } |
| nfiles = sysdirreadall(fd, &dirbuf); |
| if(nfiles > 0){ |
| for(i=0; i<nfiles; i++){ |
| if(dirbuf[i].name[0]!='C' || dirbuf[i].name[1]!='.') |
| continue; |
| dofile(&dirbuf[i]); |
| } |
| free(dirbuf); |
| } |
| if(aflag && sflag) |
| sysunlockfile(fd); |
| else |
| close(fd); |
| } |
| |
| /* |
| * free files matching name in the current directory |
| */ |
| void |
| remmatch(char *name) |
| { |
| long i; |
| |
| syslog(0, runqlog, "removing %s/%s", curdir, name); |
| |
| for(i=0; i<nfiles; i++){ |
| if(strcmp(&dirbuf[i].name[1], &name[1]) == 0) |
| sysremove(dirbuf[i].name); |
| } |
| |
| /* error file (may have) appeared after we read the directory */ |
| /* stomp on data file in case of phase error */ |
| sysremove(file(name, 'D')); |
| sysremove(file(name, 'E')); |
| } |
| |
| /* |
| * like trylock, but we've already got the lock on fd, |
| * and don't want an L. lock file. |
| */ |
| static Mlock * |
| keeplockalive(char *path, int fd) |
| { |
| char buf[1]; |
| Mlock *l; |
| |
| l = malloc(sizeof(Mlock)); |
| if(l == 0) |
| return 0; |
| l->fd = fd; |
| l->name = s_new(); |
| s_append(l->name, path); |
| |
| /* fork process to keep lock alive until sysunlock(l) */ |
| switch(l->pid = rfork(RFPROC)){ |
| default: |
| break; |
| case 0: |
| fd = l->fd; |
| for(;;){ |
| sleep(1000*60); |
| if(pread(fd, buf, 1, 0) < 0) |
| break; |
| } |
| _exits(0); |
| } |
| return l; |
| } |
| |
| /* |
| * try a message |
| */ |
| void |
| dofile(Dir *dp) |
| { |
| Dir *d; |
| int dfd, ac, dtime, efd, pid, i, etime; |
| char *buf, *cp, **av; |
| Waitmsg *wm; |
| Biobuf *b; |
| Mlock *l = nil; |
| |
| if(debug) |
| fprint(2, "dofile %s\n", dp->name); |
| /* |
| * if no data file or empty control or data file, just clean up |
| * the empty control file must be 15 minutes old, to minimize the |
| * chance of a race. |
| */ |
| d = dirstat(file(dp->name, 'D')); |
| if(d == nil){ |
| syslog(0, runqlog, "no data file for %s", dp->name); |
| remmatch(dp->name); |
| return; |
| } |
| if(dp->length == 0){ |
| if(time(0)-dp->mtime > 15*60){ |
| syslog(0, runqlog, "empty ctl file for %s", dp->name); |
| remmatch(dp->name); |
| } |
| return; |
| } |
| dtime = d->mtime; |
| free(d); |
| |
| /* |
| * retry times depend on the age of the errors file |
| */ |
| if(!Eflag && (d = dirstat(file(dp->name, 'E'))) != nil){ |
| etime = d->mtime; |
| free(d); |
| if(etime - dtime < 60*60){ |
| /* up to the first hour, try every 15 minutes */ |
| if(time(0) - etime < 15*60) |
| return; |
| } else { |
| /* after the first hour, try once an hour */ |
| if(time(0) - etime < 60*60) |
| return; |
| } |
| |
| } |
| |
| /* |
| * open control and data |
| */ |
| b = sysopen(file(dp->name, 'C'), "rl", 0660); |
| if(b == 0) { |
| if(debug) |
| fprint(2, "can't open %s: %r\n", file(dp->name, 'C')); |
| return; |
| } |
| dfd = open(file(dp->name, 'D'), OREAD); |
| if(dfd < 0){ |
| if(debug) |
| fprint(2, "can't open %s: %r\n", file(dp->name, 'D')); |
| Bterm(b); |
| sysunlockfile(Bfildes(b)); |
| return; |
| } |
| |
| /* |
| * make arg list |
| * - read args into (malloc'd) buffer |
| * - malloc a vector and copy pointers to args into it |
| */ |
| buf = malloc(dp->length+1); |
| if(buf == 0){ |
| warning("buffer allocation", 0); |
| Bterm(b); |
| sysunlockfile(Bfildes(b)); |
| close(dfd); |
| return; |
| } |
| if(Bread(b, buf, dp->length) != dp->length){ |
| warning("reading control file %s\n", dp->name); |
| Bterm(b); |
| sysunlockfile(Bfildes(b)); |
| close(dfd); |
| free(buf); |
| return; |
| } |
| buf[dp->length] = 0; |
| av = malloc(2*sizeof(char*)); |
| if(av == 0){ |
| warning("argv allocation", 0); |
| close(dfd); |
| free(buf); |
| Bterm(b); |
| sysunlockfile(Bfildes(b)); |
| return; |
| } |
| for(ac = 1, cp = buf; *cp; ac++){ |
| while(isspace(*cp)) |
| *cp++ = 0; |
| if(*cp == 0) |
| break; |
| |
| av = realloc(av, (ac+2)*sizeof(char*)); |
| if(av == 0){ |
| warning("argv allocation", 0); |
| close(dfd); |
| free(buf); |
| Bterm(b); |
| sysunlockfile(Bfildes(b)); |
| return; |
| } |
| av[ac] = cp; |
| while(*cp && !isspace(*cp)){ |
| if(*cp++ == '"'){ |
| while(*cp && *cp != '"') |
| cp++; |
| if(*cp) |
| cp++; |
| } |
| } |
| } |
| av[0] = cmd; |
| av[ac] = 0; |
| |
| if(!Eflag &&time(0) - dtime > giveup){ |
| if(returnmail(av, dp->name, "Giveup") != 0) |
| logit("returnmail failed", dp->name, av); |
| remmatch(dp->name); |
| goto done; |
| } |
| |
| for(i = 0; i < nbad; i++){ |
| if(strcmp(av[3], badsys[i]) == 0) |
| goto done; |
| } |
| |
| /* |
| * Ken's fs, for example, gives us 5 minutes of inactivity before |
| * the lock goes stale, so we have to keep reading it. |
| */ |
| l = keeplockalive(file(dp->name, 'C'), Bfildes(b)); |
| |
| /* |
| * transfer |
| */ |
| pid = fork(); |
| switch(pid){ |
| case -1: |
| sysunlock(l); |
| sysunlockfile(Bfildes(b)); |
| syslog(0, runqlog, "out of procs"); |
| exits(0); |
| case 0: |
| if(debug) { |
| fprint(2, "Starting %s", cmd); |
| for(ac = 0; av[ac]; ac++) |
| fprint(2, " %s", av[ac]); |
| fprint(2, "\n"); |
| } |
| logit("execing", dp->name, av); |
| close(0); |
| dup(dfd, 0); |
| close(dfd); |
| close(2); |
| efd = open(file(dp->name, 'E'), OWRITE); |
| if(efd < 0){ |
| if(debug) syslog(0, "runq", "open %s as %s: %r", file(dp->name,'E'), getuser()); |
| efd = create(file(dp->name, 'E'), OWRITE, 0666); |
| if(efd < 0){ |
| if(debug) syslog(0, "runq", "create %s as %s: %r", file(dp->name, 'E'), getuser()); |
| exits("could not open error file - Retry"); |
| } |
| } |
| seek(efd, 0, 2); |
| exec(cmd, av); |
| error("can't exec %s", cmd); |
| break; |
| default: |
| for(;;){ |
| wm = wait(); |
| if(wm == nil) |
| error("wait failed: %r", ""); |
| if(wm->pid == pid) |
| break; |
| free(wm); |
| } |
| if(debug) |
| fprint(2, "wm->pid %d wm->msg == %s\n", wm->pid, wm->msg); |
| |
| if(wm->msg[0]){ |
| if(debug) |
| fprint(2, "[%d] wm->msg == %s\n", getpid(), wm->msg); |
| if(!Rflag && atoi(wm->msg) != RetryCode){ |
| /* return the message and remove it */ |
| if(returnmail(av, dp->name, wm->msg) != 0) |
| logit("returnmail failed", dp->name, av); |
| remmatch(dp->name); |
| } else { |
| /* add sys to bad list and try again later */ |
| nbad++; |
| badsys = realloc(badsys, nbad*sizeof(char*)); |
| badsys[nbad-1] = strdup(av[3]); |
| } |
| } else { |
| /* it worked remove the message */ |
| remmatch(dp->name); |
| } |
| free(wm); |
| |
| } |
| done: |
| if (l) |
| sysunlock(l); |
| Bterm(b); |
| sysunlockfile(Bfildes(b)); |
| free(buf); |
| free(av); |
| close(dfd); |
| } |
| |
| |
| /* |
| * return a name starting with the given character |
| */ |
| char* |
| file(char *name, char type) |
| { |
| static char nname[Elemlen+1]; |
| |
| strncpy(nname, name, Elemlen); |
| nname[Elemlen] = 0; |
| nname[0] = type; |
| return nname; |
| } |
| |
| /* |
| * send back the mail with an error message |
| * |
| * return 0 if successful |
| */ |
| int |
| returnmail(char **av, char *name, char *msg) |
| { |
| int pfd[2]; |
| Waitmsg *wm; |
| int fd; |
| char buf[256]; |
| char attachment[256]; |
| int i; |
| long n; |
| String *s; |
| char *sender; |
| |
| if(av[1] == 0 || av[2] == 0){ |
| logit("runq - dumping bad file", name, av); |
| return 0; |
| } |
| |
| s = unescapespecial(s_copy(av[2])); |
| sender = s_to_c(s); |
| |
| if(!returnable(sender) || strcmp(sender, "postmaster") == 0) { |
| logit("runq - dumping p to p mail", name, av); |
| return 0; |
| } |
| |
| if(pipe(pfd) < 0){ |
| logit("runq - pipe failed", name, av); |
| return -1; |
| } |
| |
| switch(rfork(RFFDG|RFPROC|RFENVG)){ |
| case -1: |
| logit("runq - fork failed", name, av); |
| return -1; |
| case 0: |
| logit("returning", name, av); |
| close(pfd[1]); |
| close(0); |
| dup(pfd[0], 0); |
| close(pfd[0]); |
| putenv("upasname", "/dev/null"); |
| snprint(buf, sizeof(buf), "%s/marshal", UPASBIN); |
| snprint(attachment, sizeof(attachment), "%s", file(name, 'D')); |
| execl(buf, "send", "-A", attachment, "-s", "permanent failure", sender, nil); |
| error("can't exec", 0); |
| break; |
| default: |
| break; |
| } |
| |
| close(pfd[0]); |
| fprint(pfd[1], "\n"); /* get out of headers */ |
| if(av[1]){ |
| fprint(pfd[1], "Your request ``%.20s ", av[1]); |
| for(n = 3; av[n]; n++) |
| fprint(pfd[1], "%s ", av[n]); |
| } |
| fprint(pfd[1], "'' failed (code %s).\nThe symptom was:\n\n", msg); |
| fd = open(file(name, 'E'), OREAD); |
| if(fd >= 0){ |
| for(;;){ |
| n = read(fd, buf, sizeof(buf)); |
| if(n <= 0) |
| break; |
| if(write(pfd[1], buf, n) != n){ |
| close(fd); |
| goto out; |
| } |
| } |
| close(fd); |
| } |
| close(pfd[1]); |
| out: |
| wm = wait(); |
| if(wm == nil){ |
| syslog(0, "runq", "wait: %r"); |
| logit("wait failed", name, av); |
| return -1; |
| } |
| i = 0; |
| if(wm->msg[0]){ |
| i = -1; |
| syslog(0, "runq", "returnmail child: %s", wm->msg); |
| logit("returnmail child failed", name, av); |
| } |
| free(wm); |
| return i; |
| } |
| |
| /* |
| * print a warning and continue |
| */ |
| void |
| warning(char *f, void *a) |
| { |
| char err[65]; |
| char buf[256]; |
| |
| rerrstr(err, sizeof(err)); |
| snprint(buf, sizeof(buf), f, a); |
| fprint(2, "runq: %s: %s\n", buf, err); |
| } |
| |
| /* |
| * print an error and die |
| */ |
| void |
| error(char *f, void *a) |
| { |
| char err[Errlen]; |
| char buf[256]; |
| |
| rerrstr(err, sizeof(err)); |
| snprint(buf, sizeof(buf), f, a); |
| fprint(2, "runq: %s: %s\n", buf, err); |
| exits(buf); |
| } |
| |
| void |
| logit(char *msg, char *file, char **av) |
| { |
| int n, m; |
| char buf[256]; |
| |
| n = snprint(buf, sizeof(buf), "%s/%s: %s", curdir, file, msg); |
| for(; *av; av++){ |
| m = strlen(*av); |
| if(n + m + 4 > sizeof(buf)) |
| break; |
| sprint(buf + n, " '%s'", *av); |
| n += m + 3; |
| } |
| syslog(0, runqlog, "%s", buf); |
| } |
| |
| char *loadfile = ".runqload"; |
| |
| /* |
| * load balancing |
| */ |
| void |
| doload(int start) |
| { |
| int fd; |
| char buf[32]; |
| int i, n; |
| Mlock *l; |
| Dir *d; |
| |
| if(load <= 0) |
| return; |
| |
| if(chdir(root) < 0){ |
| load = 0; |
| return; |
| } |
| |
| l = syslock(loadfile); |
| fd = open(loadfile, ORDWR); |
| if(fd < 0){ |
| fd = create(loadfile, 0666, ORDWR); |
| if(fd < 0){ |
| load = 0; |
| sysunlock(l); |
| return; |
| } |
| } |
| |
| /* get current load */ |
| i = 0; |
| n = read(fd, buf, sizeof(buf)-1); |
| if(n >= 0){ |
| buf[n] = 0; |
| i = atoi(buf); |
| } |
| if(i < 0) |
| i = 0; |
| |
| /* ignore load if file hasn't been changed in 30 minutes */ |
| d = dirfstat(fd); |
| if(d != nil){ |
| if(d->mtime + 30*60 < time(0)) |
| i = 0; |
| free(d); |
| } |
| |
| /* if load already too high, give up */ |
| if(start && i >= load){ |
| sysunlock(l); |
| exits(0); |
| } |
| |
| /* increment/decrement load */ |
| if(start) |
| i++; |
| else |
| i--; |
| seek(fd, 0, 0); |
| fprint(fd, "%d\n", i); |
| sysunlock(l); |
| close(fd); |
| } |