libmach: more mach-o support

R=rsc
http://codereview.appspot.com/2277041
diff --git a/src/libmach/LICENSE b/src/libmach/LICENSE
index 916fb12..bf30f8e 100644
--- a/src/libmach/LICENSE
+++ b/src/libmach/LICENSE
@@ -11,6 +11,7 @@
 	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).
 	Revisions Copyright © 2000-2004 Lucent Technologies Inc. and others.
 	Portions Copyright © 2001-2007 Russ Cox.
+	Portions Copyright © 2008-2010 Google Inc.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/src/libmach/crackmacho.c b/src/libmach/crackmacho.c
index c19d81e..bb521fa 100644
--- a/src/libmach/crackmacho.c
+++ b/src/libmach/crackmacho.c
@@ -115,8 +115,8 @@
 		if(m->cmd[i].type == MachoCmdSymtab)
 			break;
 	if(i < m->ncmd){
-		fp->stabs.stabbase = load(fp->fd, m->cmd[i].sym.symoff, m->cmd[i].sym.nsyms*16);
-		fp->stabs.stabsize = m->cmd[i].sym.nsyms*16;
+		fp->stabs.stabbase = load(fp->fd, m->cmd[i].sym.symoff, m->cmd[i].sym.nsym*16);
+		fp->stabs.stabsize = m->cmd[i].sym.nsym*16;
 		fp->stabs.strbase = (char*)load(fp->fd, m->cmd[i].sym.stroff, m->cmd[i].sym.strsize);
 		if(fp->stabs.stabbase == nil || fp->stabs.strbase == nil){
 			fp->stabs.stabbase = nil;
diff --git a/src/libmach/macho.c b/src/libmach/macho.c
index 9d9a123..7f12735 100644
--- a/src/libmach/macho.c
+++ b/src/libmach/macho.c
@@ -6,6 +6,27 @@
 /*
 http://www.channelu.com/NeXT/NeXTStep/3.3/nd/DevTools/14_MachO/MachO.htmld/ 
 */
+static long
+preadn(int fd, void *vdata, uint32 ulen, uint64 offset)
+{
+	long n;
+	uchar *data;
+	long len;
+
+	len = ulen;
+	data = vdata;
+/*	fprint(2, "readn 0x%llux 0x%ux\n", offset, ulen); */
+	while(len > 0){
+		n = pread(fd, data, len, offset);
+		if(n <= 0)
+			break;
+		data += n;
+		offset += n;
+		len -= n;
+	}
+	return data-(uchar*)vdata;
+}
+
 
 Macho*
 machoopen(char *name)
@@ -22,11 +43,15 @@
 }
 
 static int
-unpackseg(uchar *p, Macho *m, MachoCmd *c, uint type, uint sz)
+unpackcmd(uchar *p, Macho *m, MachoCmd *c, uint type, uint sz)
 {
-	u32int (*e4)(uchar*);
+	uint32 (*e4)(uchar*);
+	uint64 (*e8)(uchar*);
+	MachoSect *s;
+	int i;
 
 	e4 = m->e4;
+	e8 = m->e8;
 
 	c->type = type;
 	c->size = sz;
@@ -45,59 +70,257 @@
 		c->seg.initprot = e4(p+44);
 		c->seg.nsect = e4(p+48);
 		c->seg.flags = e4(p+52);
+		c->seg.sect = mallocz(c->seg.nsect * sizeof c->seg.sect[0], 1);
+		if(c->seg.sect == nil)
+			return -1;
+		if(sz < 56+c->seg.nsect*68)
+			return -1;
+		p += 56;
+		for(i=0; i<c->seg.nsect; i++) {
+			s = &c->seg.sect[i];
+			strecpy(s->name, s->name+sizeof s->name, (char*)p+0);
+			strecpy(s->segname, s->segname+sizeof s->segname, (char*)p+16);
+			s->addr = e4(p+32);
+			s->size = e4(p+36);
+			s->offset = e4(p+40);
+			s->align = e4(p+44);
+			s->reloff = e4(p+48);
+			s->nreloc = e4(p+52);
+			s->flags = e4(p+56);
+			// p+60 and p+64 are reserved
+			p += 68;
+		}
+		break;
+	case MachoCmdSegment64:
+		if(sz < 72)
+			return -1;
+		strecpy(c->seg.name, c->seg.name+sizeof c->seg.name, (char*)p+8);
+		c->seg.vmaddr = e8(p+24);
+		c->seg.vmsize = e8(p+32);
+		c->seg.fileoff = e8(p+40);
+		c->seg.filesz = e8(p+48);
+		c->seg.maxprot = e4(p+56);
+		c->seg.initprot = e4(p+60);
+		c->seg.nsect = e4(p+64);
+		c->seg.flags = e4(p+68);
+		c->seg.sect = mallocz(c->seg.nsect * sizeof c->seg.sect[0], 1);
+		if(c->seg.sect == nil)
+			return -1;
+		if(sz < 72+c->seg.nsect*80)
+			return -1;
+		p += 72;
+		for(i=0; i<c->seg.nsect; i++) {
+			s = &c->seg.sect[i];
+			strecpy(s->name, s->name+sizeof s->name, (char*)p+0);
+			strecpy(s->segname, s->segname+sizeof s->segname, (char*)p+16);
+			s->addr = e8(p+32);
+			s->size = e8(p+40);
+			s->offset = e4(p+48);
+			s->align = e4(p+52);
+			s->reloff = e4(p+56);
+			s->nreloc = e4(p+60);
+			s->flags = e4(p+64);
+			// p+68, p+72, and p+76 are reserved
+			p += 80;
+		}
 		break;
 	case MachoCmdSymtab:
 		if(sz < 24)
 			return -1;
 		c->sym.symoff = e4(p+8);
-		c->sym.nsyms = e4(p+12);
+		c->sym.nsym = e4(p+12);
 		c->sym.stroff = e4(p+16);
 		c->sym.strsize = e4(p+20);
 		break;
+	case MachoCmdDysymtab:
+		if(sz < 80)
+			return -1;
+		c->dsym.ilocalsym = e4(p+8);
+		c->dsym.nlocalsym = e4(p+12);
+		c->dsym.iextdefsym = e4(p+16);
+		c->dsym.nextdefsym = e4(p+20);
+		c->dsym.iundefsym = e4(p+24);
+		c->dsym.nundefsym = e4(p+28);
+		c->dsym.tocoff = e4(p+32);
+		c->dsym.ntoc = e4(p+36);
+		c->dsym.modtaboff = e4(p+40);
+		c->dsym.nmodtab = e4(p+44);
+		c->dsym.extrefsymoff = e4(p+48);
+		c->dsym.nextrefsyms = e4(p+52);
+		c->dsym.indirectsymoff = e4(p+56);
+		c->dsym.nindirectsyms = e4(p+60);
+		c->dsym.extreloff = e4(p+64);
+		c->dsym.nextrel = e4(p+68);
+		c->dsym.locreloff = e4(p+72);
+		c->dsym.nlocrel = e4(p+76);
+		break;
 	}
 	return 0;
 }
 
+int
+macholoadrel(Macho *m, MachoSect *sect)
+{
+	MachoRel *rel, *r;
+	uchar *buf, *p;
+	int i, n;
+	uint32 v;
+	
+	if(sect->rel != nil || sect->nreloc == 0)
+		return 0;
+	rel = mallocz(sect->nreloc * sizeof r[0], 1);
+	if(rel == nil)
+		return -1;
+	n = sect->nreloc * 8;
+	buf = mallocz(n, 1);
+	if(buf == nil) {
+		free(rel);
+		return -1;
+	}
+	if(seek(m->fd, sect->reloff, 0) < 0 || readn(m->fd, buf, n) != n) {
+		free(rel);
+		free(buf);
+		return -1;
+	}
+	for(i=0; i<sect->nreloc; i++) {
+		r = &rel[i];
+		p = buf+i*8;
+		r->addr = m->e4(p);
+		
+		// TODO(rsc): Wrong interpretation for big-endian bitfields?
+		v = m->e4(p+4);
+		r->symnum = v & 0xFFFFFF;
+		v >>= 24;
+		r->pcrel = v&1;
+		v >>= 1;
+		r->length = 1<<(v&3);
+		v >>= 2;
+		r->extrn = v&1;
+		v >>= 1;
+		r->type = v;
+	}
+	sect->rel = rel;
+	free(buf);
+	return 0;
+}
+
+int 
+macholoadsym(Macho *m, MachoSymtab *symtab)
+{
+	char *strbuf;
+	uchar *symbuf, *p;
+	int i, n, symsize;
+	MachoSym *sym, *s;
+	uint32 v;
+
+	if(symtab->sym != nil)
+		return 0;
+
+	strbuf = mallocz(symtab->strsize, 1);
+	if(strbuf == nil)
+		return -1;
+	if(seek(m->fd, symtab->stroff, 0) < 0 || readn(m->fd, strbuf, symtab->strsize) != symtab->strsize) {
+		free(strbuf);
+		return -1;
+	}
+	
+	symsize = 12;
+	if(m->is64)
+		symsize = 16;
+	n = symtab->nsym * symsize;
+	symbuf = mallocz(n, 1);
+	if(symbuf == nil) {
+		free(strbuf);
+		return -1;
+	}
+	if(seek(m->fd, symtab->symoff, 0) < 0 || readn(m->fd, symbuf, n) != n) {
+		free(strbuf);
+		free(symbuf);
+		return -1;
+	}
+	sym = mallocz(symtab->nsym * sizeof sym[0], 1);
+	if(sym == nil) {
+		free(strbuf);
+		free(symbuf);
+		return -1;
+	}
+	p = symbuf;
+	for(i=0; i<symtab->nsym; i++) {
+		s = &sym[i];
+		v = m->e4(p);
+		if(v >= symtab->strsize) {
+			free(strbuf);
+			free(symbuf);
+			free(sym);
+			return -1;
+		}
+		s->name = strbuf + v;
+		s->type = p[4];
+		s->sectnum = p[5];
+		s->desc = m->e2(p+6);
+		if(m->is64)
+			s->value = m->e8(p+8);
+		else
+			s->value = m->e4(p+8);
+		p += symsize;
+	}
+	symtab->str = strbuf;
+	symtab->sym = sym;
+	free(symbuf);
+	return 0;
+}
 
 Macho*
 machoinit(int fd)
 {
-	int i;
+	int i, is64;
 	uchar hdr[7*4], *cmdp;
-	u32int (*e4)(uchar*);
+	uchar tmp[4];
+	uint16 (*e2)(uchar*);
+	uint32 (*e4)(uchar*);
+	uint64 (*e8)(uchar*);
 	ulong ncmd, cmdsz, ty, sz, off;
 	Macho *m;
 
 	if(seek(fd, 0, 0) < 0 || readn(fd, hdr, sizeof hdr) != sizeof hdr)
 		return nil;
 
-	if(beload4(hdr) == 0xFEEDFACE)
+	if((beload4(hdr)&~1) == 0xFEEDFACE){
+		e2 = beload2;
 		e4 = beload4;
-	else if(leload4(hdr) == 0xFEEDFACE)
+		e8 = beload8;
+	}else if((leload4(hdr)&~1) == 0xFEEDFACE){
+		e2 = leload2;
 		e4 = leload4;
-	else{
+		e8 = leload8;
+	}else{
 		werrstr("bad magic - not mach-o file");
 		return nil;
 	}
-
+	is64 = e4(hdr) == 0xFEEDFACF;
 	ncmd = e4(hdr+4*4);
 	cmdsz = e4(hdr+5*4);
 	if(ncmd > 0x10000 || cmdsz >= 0x01000000){
 		werrstr("implausible mach-o header ncmd=%lud cmdsz=%lud", ncmd, cmdsz);
 		return nil;
 	}
+	if(is64)
+		readn(fd, tmp, 4);	// skip reserved word in header
 
 	m = mallocz(sizeof(*m)+ncmd*sizeof(MachoCmd)+cmdsz, 1);
 	if(m == nil)
 		return nil;
 
 	m->fd = fd;
+	m->e2 = e2;
 	m->e4 = e4;
+	m->e8 = e8;
 	m->cputype = e4(hdr+1*4);
 	m->subcputype = e4(hdr+2*4);
 	m->filetype = e4(hdr+3*4);
 	m->ncmd = ncmd;
 	m->flags = e4(hdr+6*4);
+	m->is64 = is64;
 
 	m->cmd = (MachoCmd*)(m+1);
 	off = sizeof hdr;
@@ -112,11 +335,10 @@
 		ty = e4(cmdp);
 		sz = e4(cmdp+4);
 		m->cmd[i].off = off;
-		unpackseg(cmdp, m, &m->cmd[i], ty, sz);
+		unpackcmd(cmdp, m, &m->cmd[i], ty, sz);
 		cmdp += sz;
 		off += sz;
 	}
-
 	return m;
 }
 
diff --git a/src/libmach/macho.h b/src/libmach/macho.h
index d2a1a2e..2b449f0 100644
--- a/src/libmach/macho.h
+++ b/src/libmach/macho.h
@@ -1,11 +1,18 @@
 typedef struct Macho Macho;
 typedef struct MachoCmd MachoCmd;
+typedef struct MachoSeg MachoSeg;
+typedef struct MachoSect MachoSect;
+typedef struct MachoRel MachoRel;
+typedef struct MachoSymtab MachoSymtab;
+typedef struct MachoSym MachoSym;
+typedef struct MachoDysymtab MachoDysymtab;
 
 enum
 {
 	MachoCpuVax = 1,
 	MachoCpu68000 = 6,
 	MachoCpu386 = 7,
+	MachoCpuAmd64 = 0x1000007,
 	MachoCpuMips = 8,
 	MachoCpu98000 = 10,
 	MachoCpuHppa = 11,
@@ -20,6 +27,8 @@
 	MachoCmdSymtab = 2,
 	MachoCmdSymseg = 3,
 	MachoCmdThread = 4,
+	MachoCmdDysymtab = 11,
+	MachoCmdSegment64 = 25,
 
 	MachoFileObject = 1,
 	MachoFileExecutable = 2,
@@ -28,40 +37,111 @@
 	MachoFilePreload = 5
 };
 
+struct MachoSeg
+{
+	char name[16+1];
+	uint64 vmaddr;
+	uint64 vmsize;
+	uint32 fileoff;
+	uint32 filesz;
+	uint32 maxprot;
+	uint32 initprot;
+	uint32 nsect;
+	uint32 flags;
+	MachoSect *sect;
+};
+
+struct MachoSect
+{
+	char	name[16+1];
+	char	segname[16+1];
+	uint64 addr;
+	uint64 size;
+	uint32 offset;
+	uint32 align;
+	uint32 reloff;
+	uint32 nreloc;
+	uint32 flags;
+	
+	MachoRel *rel;
+};
+
+struct MachoRel
+{
+	uint32 addr;
+	uint32 symnum;
+	uint8 pcrel;
+	uint8 length;
+	uint8 extrn;
+	uint8 type;
+};
+
+struct MachoSymtab
+{
+	uint32 symoff;
+	uint32 nsym;
+	uint32 stroff;
+	uint32 strsize;
+	
+	char *str;
+	MachoSym *sym;
+};
+
+struct MachoSym
+{
+	char *name;
+	uint8 type;
+	uint8 sectnum;
+	uint16 desc;
+	char kind;
+	uint64 value;
+};
+
+struct MachoDysymtab
+{
+	uint32 ilocalsym;
+	uint32 nlocalsym;
+	uint32 iextdefsym;
+	uint32 nextdefsym;
+	uint32 iundefsym;
+	uint32 nundefsym;
+	uint32 tocoff;
+	uint32 ntoc;
+	uint32 modtaboff;
+	uint32 nmodtab;
+	uint32 extrefsymoff;
+	uint32 nextrefsyms;
+	uint32 indirectsymoff;
+	uint32 nindirectsyms;
+	uint32 extreloff;
+	uint32 nextrel;
+	uint32 locreloff;
+	uint32 nlocrel;
+};
+
 struct MachoCmd
 {
 	int type;
-	ulong off;
-	ulong size;
-	struct {
-		char name[16+1];
-		ulong vmaddr;
-		ulong vmsize;
-		ulong fileoff;
-		ulong filesz;
-		ulong maxprot;
-		ulong initprot;
-		ulong nsect;
-		ulong flags;
-	} seg;
-	struct {
-		ulong symoff;
-		ulong nsyms;
-		ulong stroff;
-		ulong strsize;
-	} sym;
+	uint32 off;
+	uint32 size;
+	MachoSeg seg;
+	MachoSymtab sym;
+	MachoDysymtab dsym;
 };
 
 struct Macho
 {
 	int fd;
+	int is64;
 	uint cputype;
 	uint subcputype;
-	ulong filetype;
-	ulong flags;
+	uint32 filetype;
+	uint32 flags;
 	MachoCmd *cmd;
 	uint ncmd;
-	u32int (*e4)(uchar*);
+	uint16 (*e2)(uchar*);
+	uint32 (*e4)(uchar*);
+	uint64 (*e8)(uchar*);
 	int (*coreregs)(Macho*, uchar**);
 };
 
@@ -69,3 +149,5 @@
 Macho *machoinit(int);
 void machoclose(Macho*);
 int coreregsmachopower(Macho*, uchar**);
+int macholoadrel(Macho*, MachoSect*);
+int macholoadsym(Macho*, MachoSymtab*);
diff --git a/src/libmach/mkfile b/src/libmach/mkfile
index c218c2c..3d3a678 100644
--- a/src/libmach/mkfile
+++ b/src/libmach/mkfile
@@ -71,6 +71,9 @@
 demangler: demangler.o $LIBDIR/$LIB
 	$LD -o $target $prereq -l9
 
+machodump: machodump.o $LIBDIR/$LIB
+	$LD -o $target $prereq -l9
+
 
 SunOS.$O: nosys.c
 Darwin.$O: nosys.c