new tar import
diff --git a/src/cmd/tar.c b/src/cmd/tar.c
index c468fcb..576db67 100644
--- a/src/cmd/tar.c
+++ b/src/cmd/tar.c
@@ -16,9 +16,7 @@
  * keyletters and options.
  */
 #define	TARGBEGIN {\
-	if (argv0 == nil)\
-		argv0 = *argv;\
-	argv++, argc--;\
+	(argv0 || (argv0 = *argv)), argv++, argc--;\
 	if (argv[0]) {\
 		char *_args, *_argt;\
 		Rune _argc;\
@@ -34,6 +32,12 @@
 #define ROUNDUP(a, b)	(((a) + (b) - 1)/(b))
 #define BYTES2TBLKS(bytes) ROUNDUP(bytes, Tblock)
 
+/* read big-endian binary integers; args must be (uchar *) */
+#define	G2BEBYTE(x)	(((x)[0]<<8)  |  (x)[1])
+#define	G3BEBYTE(x)	(((x)[0]<<16) | ((x)[1]<<8)  |  (x)[2])
+#define	G4BEBYTE(x)	(((x)[0]<<24) | ((x)[1]<<16) | ((x)[2]<<8) | (x)[3])
+#define	G8BEBYTE(x)	(((vlong)G4BEBYTE(x)<<32) | (u32int)G4BEBYTE((x)+4))
+
 typedef vlong Off;
 typedef char *(*Refill)(int ar, char *bufs, int justhdr);
 
@@ -44,11 +48,14 @@
 enum { Alldata, Justnxthdr };
 enum {
 	Tblock = 512,
-	Nblock = 40,		/* maximum blocksize */
-	Dblock = 20,		/* default blocksize */
 	Namsiz = 100,
 	Maxpfx = 155,		/* from POSIX */
 	Maxname = Namsiz + 1 + Maxpfx,
+	Binsize = 0x80,		/* flag in size[0], from gnu: positive binary size */
+	Binnegsz = 0xff,	/* flag in size[0]: negative binary size */
+
+	Nblock = 40,		/* maximum blocksize */
+	Dblock = 20,		/* default blocksize */
 	DEBUG = 0,
 };
 
@@ -58,7 +65,7 @@
 	LF_PLAIN2 =	'0',
 	LF_LINK =	'1',
 	LF_SYMLINK1 =	'2',
-	LF_SYMLINK2 =	's',
+	LF_SYMLINK2 =	's',		/* 4BSD used this */
 	LF_CHR =	'3',
 	LF_BLK =	'4',
 	LF_DIR =	'5',
@@ -71,25 +78,28 @@
 #define isreallink(lf)	((lf) == LF_LINK)
 #define issymlink(lf)	((lf) == LF_SYMLINK1 || (lf) == LF_SYMLINK2)
 
-typedef struct {
-	char	name[Namsiz];
-	char	mode[8];
-	char	uid[8];
-	char	gid[8];
-	char	size[12];
-	char	mtime[12];
-	char	chksum[8];
-	char	linkflag;
-	char	linkname[Namsiz];
+typedef union {
+	uchar	data[Tblock];
+	struct {
+		char	name[Namsiz];
+		char	mode[8];
+		char	uid[8];
+		char	gid[8];
+		char	size[12];
+		char	mtime[12];
+		char	chksum[8];
+		char	linkflag;
+		char	linkname[Namsiz];
 
-	/* rest are defined by POSIX's ustar format; see p1003.2b */
-	char	magic[6];	/* "ustar" */
-	char	version[2];
-	char	uname[32];
-	char	gname[32];
-	char	devmajor[8];
-	char	devminor[8];
-	char	prefix[Maxpfx]; /* if non-null, path= prefix "/" name */
+		/* rest are defined by POSIX's ustar format; see p1003.2b */
+		char	magic[6];	/* "ustar" */
+		char	version[2];
+		char	uname[32];
+		char	gname[32];
+		char	devmajor[8];
+		char	devminor[8];
+		char	prefix[Maxpfx]; /* if non-null, path= prefix "/" name */
+	};
 } Hdr;
 
 typedef struct {
@@ -247,9 +257,9 @@
 
 	if (first)
 		seekable = seek(ar, 0, 1) >= 0;
+	blkoff = seek(ar, 0, 1);		/* note position for `tar r' */
 	/* try to size non-pipe input at first read */
 	if (first && usefile) {
-		blkoff = seek(ar, 0, 1);	/* note position */
 		n = read(ar, bufs, bytes);
 		if (n <= 0)
 			sysfatal("error reading archive: %r");
@@ -317,7 +327,7 @@
 	Hdr *hp = getblke(ar);
 
 	if (hp != nil)
-		memset(hp, 0, Tblock);
+		memset(hp->data, 0, Tblock);
 	return hp;
 }
 
@@ -385,12 +395,12 @@
  * modifies hp->chksum but restores it; important for the last block of the
  * old archive when updating with `tar rf archive'
  */
-long
+static long
 chksum(Hdr *hp)
 {
 	int n = Tblock;
 	long i = 0;
-	uchar *cp = (uchar*)hp;
+	uchar *cp = hp->data;
 	char oldsum[sizeof hp->chksum];
 
 	memmove(oldsum, hp->chksum, sizeof oldsum);
@@ -458,14 +468,62 @@
 	return name(hp)[0] == '\0';
 }
 
-Off
+/*
+static uvlong
+getbe(uchar *src, int size)
+{
+	uvlong vl = 0;
+
+	while (size-- > 0) {
+		vl <<= 8;
+		vl |= *src++;
+	}
+	return vl;
+}
+ */
+
+static void
+putbe(uchar *dest, uvlong vl, int size)
+{
+	for (dest += size; size-- > 0; vl >>= 8)
+		*--dest = vl;
+}
+
+/*
+ * return the nominal size from the header block, which is not always the
+ * size in the archive (the archive size may be zero for some file types
+ * regardless of the nominal size).
+ *
+ * gnu and freebsd tars are now recording vlongs as big-endian binary
+ * with a flag in byte 0 to indicate this, which permits file sizes up to
+ * 2^64-1 (actually 2^80-1 but our file sizes are vlongs) rather than 2^33-1.
+ */
+static Off
 hdrsize(Hdr *hp)
 {
-	Off bytes = strtoull(hp->size, nil, 8);
+	uchar *p;
 
-	if(isdir(hp))
-		bytes = 0;
-	return bytes;
+	if((uchar)hp->size[0] == Binnegsz) {
+		fprint(2, "%s: %s: negative length, which is insane\n",
+			argv0, name(hp));
+		return 0;
+	} else if((uchar)hp->size[0] == Binsize) {
+		p = (uchar *)hp->size + sizeof hp->size - 1 -
+			sizeof(vlong);		/* -1 for terminating space */
+		return G8BEBYTE(p);
+	} else
+		return strtoull(hp->size, nil, 8);
+}
+
+/*
+ * return the number of bytes recorded in the archive.
+ */
+static Off
+arsize(Hdr *hp)
+{
+	if(isdir(hp) || islink(hp->linkflag))
+		return 0;
+	return hdrsize(hp);
 }
 
 static Hdr *
@@ -483,7 +541,7 @@
 	if (chksum(hp) != hdrcksum)
 		sysfatal("bad archive header checksum: name %.64s...",
 			hp->name);
-	nexthdr += Tblock*(1 + BYTES2TBLKS(hdrsize(hp)));
+	nexthdr += Tblock*(1 + BYTES2TBLKS(arsize(hp)));
 	return hp;
 }
 
@@ -561,16 +619,19 @@
 	sprint(hp->mode, "%6lo ", dir->mode & 0777);
 	sprint(hp->uid, "%6o ", aruid);
 	sprint(hp->gid, "%6o ", argid);
-	/*
-	 * files > 2⁳⁳ bytes can't be described
-	 * (unless we resort to xustar or exustar formats).
-	 */
-	if (dir->length >= (Off)1<<33) {
-		fprint(2, "%s: %s: too large for tar header format\n",
-			argv0, file);
-		return -1;
-	}
-	sprint(hp->size, "%11lluo ", dir->length);
+	if (dir->length >= (Off)1<<32) {
+		static int printed;
+
+		if (!printed) {
+			printed = 1;
+			fprint(2, "%s: storing large sizes in \"base 256\"\n", argv0);
+		}
+		hp->size[0] = Binsize;
+		/* emit so-called `base 256' representation of size */
+		putbe((uchar *)hp->size+1, dir->length, sizeof hp->size - 2);
+		hp->size[sizeof hp->size - 1] = ' ';
+	} else
+		sprint(hp->size, "%11lluo ", dir->length);
 	sprint(hp->mtime, "%11luo ", dir->mtime);
 	hp->linkflag = (dir->mode&DMDIR? LF_DIR: LF_PLAIN1);
 	putfullname(hp, file);
@@ -633,10 +694,20 @@
 	ulong blksleft, blksread;
 	Hdr *hbp;
 	Dir *dir;
+	String *name = nil;
+
+	if (shortf[0] == '#') {
+		name = s_new();
+		s_append(name, "./");
+		s_append(name, shortf);
+		shortf = s_to_c(name);
+	}
 
 	fd = open(shortf, OREAD);
 	if (fd < 0) {
 		fprint(2, "%s: can't open %s: %r\n", argv0, file);
+		if (name)
+			s_free(name);
 		return;
 	}
 	dir = dirfstat(fd);
@@ -649,6 +720,8 @@
 		putbackblk(ar);
 		free(dir);
 		close(fd);
+		if (name)
+			s_free(name);
 		return;
 	}
 	putblk(ar);
@@ -663,7 +736,7 @@
 			hbp = getblke(ar);
 			blksread = gothowmany(blksleft);
 			bytes = blksread * Tblock;
-			n = readn(fd, hbp, bytes);
+			n = readn(fd, hbp->data, bytes);
 			if (n < 0)
 				sysfatal("error reading %s: %r", file);
 			/*
@@ -671,13 +744,15 @@
 			 * compression and emergency recovery of data.
 			 */
 			if (n < Tblock)
-				memset((uchar*)hbp + n, 0, bytes - n);
+				memset(hbp->data + n, 0, bytes - n);
 			putblkmany(ar, blksread);
 		}
 		close(fd);
 		if (verbose)
 			fprint(2, "%s\n", file);
 	}
+	if (name)
+		s_free(name);
 }
 
 static char *
@@ -706,7 +781,7 @@
 	if (usefile && !docreate) {
 		/* skip quickly to the end */
 		while ((hp = readhdr(ar)) != nil) {
-			bytes = hdrsize(hp);
+			bytes = arsize(hp);
 			for (blksleft = BYTES2TBLKS(bytes);
 			     blksleft > 0 && getblkrd(ar, Justnxthdr) != nil;
 			     blksleft -= blksread) {
@@ -777,6 +852,34 @@
 	return 0;
 }
 
+static void
+cantcreate(char *s, int mode)
+{
+	int len;
+	static char *last;
+
+	/*
+	 * Always print about files.  Only print about directories
+	 * we haven't printed about.  (Assumes archive is ordered
+	 * nicely.)
+	 */
+	if(mode&DMDIR){
+		if(last){
+			/* already printed this directory */
+			if(strcmp(s, last) == 0)
+				return;
+			/* printed a higher directory, so printed this one */
+			len = strlen(s);
+			if(memcmp(s, last, len) == 0 && last[len] == '/')
+				return;
+		}
+		/* save */
+		free(last);
+		last = strdup(s);
+	}
+	fprint(2, "%s: can't create %s: %r\n", argv0, s);
+}
+
 static int
 makedir(char *s)
 {
@@ -787,20 +890,39 @@
 	f = create(s, OREAD, DMDIR | 0777);
 	if (f >= 0)
 		close(f);
+	else
+		cantcreate(s, DMDIR);
 	return f;
 }
 
-static void
+static int
 mkpdirs(char *s)
 {
-	int done = 0;
-	char *p = s;
+	int err;
+	char *p;
 
-	while (!done && (p = strchr(p + 1, '/')) != nil) {
+	p = s;
+	err = 0;
+	while (!err && (p = strchr(p+1, '/')) != nil) {
 		*p = '\0';
-		done = (access(s, AEXIST) < 0 && makedir(s) < 0);
+		err = (access(s, AEXIST) < 0 && makedir(s) < 0);
 		*p = '/';
 	}
+	return -err;
+}
+
+/* Call access but preserve the error string. */
+static int
+xaccess(char *name, int mode)
+{
+	char err[ERRMAX];
+	int rv;
+
+	err[0] = 0;
+	errstr(err, sizeof err);
+	rv = access(name, mode);
+	errstr(err, sizeof err);
+	return rv;
 }
 
 /* copy a file from the archive into the filesystem */
@@ -811,8 +933,8 @@
 	int wrbytes, fd = -1, dir = 0;
 	long mtime = strtol(hp->mtime, nil, 8);
 	ulong mode = strtoul(hp->mode, nil, 8) & 0777;
-	Off bytes  = strtoll(hp->size, nil, 8);		/* for printing */
-	ulong blksread, blksleft = BYTES2TBLKS(hdrsize(hp));
+	Off bytes = hdrsize(hp);		/* for printing */
+	ulong blksread, blksleft = BYTES2TBLKS(arsize(hp));
 	Hdr *hbp;
 
 	if (isdir(hp)) {
@@ -857,9 +979,8 @@
 					fd = create(fname, rw, mode);
 				}
 				if (fd < 0 &&
-				    (!dir || access(fname, AEXIST) < 0))
-					fprint(2, "%s: can't create %s: %r\n",
-						argv0, fname);
+				    (!dir || xaccess(fname, AEXIST) < 0))
+				    	cantcreate(fname, mode);
 			}
 			if (fd >= 0 && verbose)
 				fprint(2, "%s\n", fname);
@@ -873,20 +994,29 @@
 	} else
 		print("%s\n", fname);
 
+	if (blksleft == 0)
+		bytes = 0;
 	for (; blksleft > 0; blksleft -= blksread) {
 		hbp = getblkrd(ar, (fd >= 0? Alldata: Justnxthdr));
 		if (hbp == nil)
 			sysfatal("unexpected EOF on archive extracting %s",
 				fname);
 		blksread = gothowmany(blksleft);
+		if (blksread <= 0)
+			fprint(2, "%s: got %ld blocks reading %s!\n",
+				argv0, blksread, fname);
 		wrbytes = Tblock*blksread;
 		if(wrbytes > bytes)
 			wrbytes = bytes;
-		if (fd >= 0 && write(fd, hbp, wrbytes) != wrbytes)
+		if (fd >= 0 && write(fd, hbp->data, wrbytes) != wrbytes)
 			sysfatal("write error on %s: %r", fname);
 		putreadblks(ar, blksread);
 		bytes -= wrbytes;
 	}
+	if (bytes > 0)
+		fprint(2,
+		    "%s: %lld bytes uncopied at eof; %s not fully extracted\n",
+			argv0, bytes, fname);
 	if (fd >= 0) {
 		/*
 		 * directories should be wstated after we're done
@@ -897,9 +1027,12 @@
 
 			nulldir(&nd);
 			nd.mtime = mtime;
-			if (isustar(hp))
-				nd.gid = hp->gname;
 			dirfwstat(fd, &nd);
+			if (isustar(hp)) {
+				nulldir(&nd);
+				nd.gid = hp->gname;
+				dirfwstat(fd, &nd);
+			}
 		}
 		close(fd);
 	}
@@ -911,7 +1044,7 @@
 	ulong blksleft, blksread;
 	Hdr *hbp;
 
-	for (blksleft = BYTES2TBLKS(hdrsize(hp)); blksleft > 0;
+	for (blksleft = BYTES2TBLKS(arsize(hp)); blksleft > 0;
 	     blksleft -= blksread) {
 		hbp = getblkrd(ar, Justnxthdr);
 		if (hbp == nil)
@@ -962,7 +1095,6 @@
 	int errflg = 0;
 	char *ret = nil;
 
-	quotefmtinstall();
 	fmtinstall('M', dirmodefmt);
 
 	TARGBEGIN {