src/cmd/index/hierarchy - plan9 - Git at Google

 #!/bin/sh

 # input:
 #	key (tab) string (tab) page numbers
 #		command	command	123
 #		command, data	command, [data]	11
 #		command, display	command, [display]	11, 54, 63, 75
 #		command, model	command, [model]	11
 #		command, quit	command, [quit]	5, 16
 # output:
 #	key (tab) string (tab) page numbers
 #		key	command  123
 #		key	   [data]  11
 #		key	   [display] ...
 #		key	   [model] ...
 #		key	   [quit] ...

 awk '
 BEGIN	{ FS = OFS = "\t" }

 {	line[NR] = $0; x[NR] = $2 "\t" $3; y[NR] = $1 }

 # find a sequence that have the same prefix
 # dump prefix, then each instance with spaces instead of prefix
 END {
 	for (i = 1; i <= NR; i = j+1) {
 		j = findrun(i)		# returns last elem of run
 		if (j > i)
 			printrun(i, j)
 		else
 			print y[i], x[i]
 	}
 }

 function findrun(s,	j, p, np) {	# find y[s],y[s+1]... with same prefix
 	p = prefix(y[s])
 	np = length(p)
 	for (j = s+1; j <= NR; j++) {
 		if (y[j] == p)			# same, so include
 			continue
 		if (index(y[j], p) != 1)	# no match
 			break
 		c = substr(y[j], np+1, 1)
 		if (c != " " && c != ",")	# has to be whole word prefix
 			break
 	}
 	return j-1
 }

 function prefix(s,	n) {	# find 1st word of s: same sort key, minus ,
 	gsub(/,/, "", s)
 	n = index(s, " ")
 	if (n > 0)
 		return substr(s, 1, n-1)
 	else
 		return s
 }

 function printrun(s, e,		i) {	# move [...] to end, "see" to front
 	s1 = 0; e1 = 0; p1 = 0; i1 = 0
 	for (i = s; i <= e; i++) {
 		if (x[i] ~ /{see/) {		# see, see also
 			sx[s1] = x[i]
 			sy[s1] = y[i]
 			s1++
 		} else if (x[i] ~ /^\[/) {	# prefix word is [...]
 			px[p1] = x[i]
 			py[p1] = y[i]
 			p1++
 		} else if (x[i] ~ /\[.*\]/) {	# [...] somewhere else
 			ex[e1] = x[i]
 			ey[e1] = y[i]
 			e1++
 		} else {			# none of the above
 			ix[i1] = x[i]
 			iy[i1] = y[i]
 			i1++
 		}
 	}
 	if (e-s+1 != s1 + p1 + i1 + e1) print "oh shit" >"/dev/stderr"

 	for (i = 0; i < s1; i++)	# "see", one/line
 		print sy[i], sx[i]
 	if (i1 > 1)
 		printgroup(ix,iy,0,i1)	# non [...] items
 	else if (i1 == 1)
 		print iy[0], ix[0]
 	if (e1 > 1)
 		printgroup(ex,ey,0,e1)	# prefix [...] items
 	else if (e1 == 1)
 		print ey[0], ex[0]
 	# for (i = 0; i < p1; i++)	# [prefix] ... items
 	# 	print py[i], px[i]
 	if (p1 > 1)
 		printgroup(px,py,0,p1)	# [prefix] ... items
 	else if (p1 == 1)
 		print py[0], px[0]
 }

 function printgroup(x, y, s, e,		i, j) {
 	split(x[s], f23)
 	if (split(f23[1], temp, " ") > 1) {
 		pfx = temp[1] " " temp[2]	# 2-word prefix
 		for (i = s+1; i < e; i++) {
 			if (index(x[i], pfx) != 1)
 				break
 			c = substr(x[i], length(pfx)+1, 1)
 			if (c != " " && c != ",")	# has to be whole word prefix
 				break
 		}
 		if (i == e) {
 			# print "got a run with", pfx
 			sub(/ /, "@", f23[1])
 			for (i = s; i < e; i++)
 				sub(/ /, "@", x[i])	# take @ out later
 		}
 	}
 	n = sub(/,?[ ~]+.*/, "", f23[1]) # zap rest of line

 	sub(/,$/, "", f23[1])
 	if (n > 0) {	# some change, so not a single word
 		sub(/@/, " ", f23[1])
 		print y[s], f23[1]	# print main entry
 	}
 	for (j = s; j < e; j++) {
 		split(x[j], f23)
 		sub(/^[^, ]+[, ]+/, "   ", f23[1])
 		sub(/@/, " ", f23[1])
 		print y[s], f23[1], f23[2]
 	}
 }

 ' $*
	#!/bin/sh

	# input:
	# key (tab) string (tab) page numbers
	# command command 123
	# command, data command, [data] 11
	# command, display command, [display] 11, 54, 63, 75
	# command, model command, [model] 11
	# command, quit command, [quit] 5, 16
	# output:
	# key (tab) string (tab) page numbers
	# key command 123
	# key [data] 11
	# key [display] ...
	# key [model] ...
	# key [quit] ...

	awk '
	BEGIN { FS = OFS = "\t" }

	{ line[NR] = $0; x[NR] = $2 "\t" $3; y[NR] = $1 }

	# find a sequence that have the same prefix
	# dump prefix, then each instance with spaces instead of prefix
	END {
	for (i = 1; i <= NR; i = j+1) {
	j = findrun(i) # returns last elem of run
	if (j > i)
	printrun(i, j)
	else
	print y[i], x[i]
	}
	}

	function findrun(s, j, p, np) { # find y[s],y[s+1]... with same prefix
	p = prefix(y[s])
	np = length(p)
	for (j = s+1; j <= NR; j++) {
	if (y[j] == p) # same, so include
	continue
	if (index(y[j], p) != 1) # no match
	break
	c = substr(y[j], np+1, 1)
	if (c != " " && c != ",") # has to be whole word prefix
	break
	}
	return j-1
	}

	function prefix(s, n) { # find 1st word of s: same sort key, minus ,
	gsub(/,/, "", s)
	n = index(s, " ")
	if (n > 0)
	return substr(s, 1, n-1)
	else
	return s
	}

	function printrun(s, e, i) { # move [...] to end, "see" to front
	s1 = 0; e1 = 0; p1 = 0; i1 = 0
	for (i = s; i <= e; i++) {
	if (x[i] ~ /{see/) { # see, see also
	sx[s1] = x[i]
	sy[s1] = y[i]
	s1++
	} else if (x[i] ~ /^\[/) { # prefix word is [...]
	px[p1] = x[i]
	py[p1] = y[i]
	p1++
	} else if (x[i] ~ /\[.*\]/) { # [...] somewhere else
	ex[e1] = x[i]
	ey[e1] = y[i]
	e1++
	} else { # none of the above
	ix[i1] = x[i]
	iy[i1] = y[i]
	i1++
	}
	}
	if (e-s+1 != s1 + p1 + i1 + e1) print "oh shit" >"/dev/stderr"

	for (i = 0; i < s1; i++) # "see", one/line
	print sy[i], sx[i]
	if (i1 > 1)
	printgroup(ix,iy,0,i1) # non [...] items
	else if (i1 == 1)
	print iy[0], ix[0]
	if (e1 > 1)
	printgroup(ex,ey,0,e1) # prefix [...] items
	else if (e1 == 1)
	print ey[0], ex[0]
	# for (i = 0; i < p1; i++) # [prefix] ... items
	# print py[i], px[i]
	if (p1 > 1)
	printgroup(px,py,0,p1) # [prefix] ... items
	else if (p1 == 1)
	print py[0], px[0]
	}

	function printgroup(x, y, s, e, i, j) {
	split(x[s], f23)
	if (split(f23[1], temp, " ") > 1) {
	pfx = temp[1] " " temp[2] # 2-word prefix
	for (i = s+1; i < e; i++) {
	if (index(x[i], pfx) != 1)
	break
	c = substr(x[i], length(pfx)+1, 1)
	if (c != " " && c != ",") # has to be whole word prefix
	break
	}
	if (i == e) {
	# print "got a run with", pfx
	sub(/ /, "@", f23[1])
	for (i = s; i < e; i++)
	sub(/ /, "@", x[i]) # take @ out later
	}
	}
	n = sub(/,?[ ~]+.*/, "", f23[1]) # zap rest of line

	sub(/,$/, "", f23[1])
	if (n > 0) { # some change, so not a single word
	sub(/@/, " ", f23[1])
	print y[s], f23[1] # print main entry
	}
	for (j = s; j < e; j++) {
	split(x[j], f23)
	sub(/^[^, ]+[, ]+/, " ", f23[1])
	sub(/@/, " ", f23[1])
	print y[s], f23[1], f23[2]
	}
	}

	' $*