xd: accept -S for 8-byte swap

R=rsc
https://codereview.appspot.com/7565045
diff --git a/man/man1/xd.1 b/man/man1/xd.1
index 10307aa..9d43a75 100644
--- a/man/man1/xd.1
+++ b/man/man1/xd.1
@@ -75,6 +75,9 @@
 .B -s
 Reverse (swab) the order of bytes in each group of 4 before printing.
 .TP
+.B -S
+Reverse the order of bytes in each group of 8 before printing.
+.TP
 .B -r
 Print repeating groups of identical 16-byte sequences as the first group
 followed by an asterisk.
diff --git a/src/cmd/xd.c b/src/cmd/xd.c
index 3e8ba5d..7be55b2 100644
--- a/src/cmd/xd.c
+++ b/src/cmd/xd.c
@@ -8,6 +8,7 @@
 unsigned long	addr;
 int		repeats;
 int		swizzle;
+int		swizzle8;
 int		flush;
 int		abase=2;
 int		xd(char *, int);
@@ -83,6 +84,12 @@
 				goto Usage;
 			continue;
 		}
+		if(argv[0][0] == 'S'){
+			swizzle8 = 1;
+			if(argv[0][1])
+				goto Usage;
+			continue;
+		}
 		if(argv[0][0] == 'u'){
 			flush = 1;
 			if(argv[0][1])
@@ -215,6 +222,8 @@
 				data[i] = 0;
 		if(swizzle)
 			swizz();
+		if(swizzle8)
+			swizz8();
 		if(ndata==16 && repeats){
 			if(addr>0 && data[0]==odata[0]){
 				for(i=1; i<16; i++)
@@ -277,6 +286,33 @@
 }
 
 void
+swizz8(void)
+{
+	uchar *p, *q;
+	int i;
+	uchar swdata[16];
+
+	p = data;
+	q = swdata;
+	for(i=0; i<16; i++)
+		*q++ = *p++;
+	p = data;
+	q = swdata;
+	for(i=0; i<8; i++){
+		p[0] = q[7];
+		p[1] = q[6];
+		p[2] = q[5];
+		p[3] = q[4];
+		p[4] = q[3];
+		p[5] = q[2];
+		p[6] = q[1];
+		p[7] = q[0];
+		p += 8;
+		q += 8;
+	}
+}
+
+void
 fmt0(char *f)
 {
 	int i;