| #!/bin/sh | 
 |  | 
 | # input: | 
 | #	key (tab) string (tab) page numbers | 
 | #		command	command	123 | 
 | #		command, data	command, [data]	11 | 
 | #		command, display	command, [display]	11, 54, 63, 75 | 
 | #		command, model	command, [model]	11 | 
 | #		command, quit	command, [quit]	5, 16 | 
 | # output: | 
 | #	key (tab) string (tab) page numbers | 
 | #		key	command  123 | 
 | #		key	   [data]  11 | 
 | #		key	   [display] ... | 
 | #		key	   [model] ... | 
 | #		key	   [quit] ... | 
 |  | 
 | awk ' | 
 | BEGIN	{ FS = OFS = "\t" } | 
 |  | 
 | {	line[NR] = $0; x[NR] = $2 "\t" $3; y[NR] = $1 } | 
 |  | 
 | # find a sequence that have the same prefix | 
 | # dump prefix, then each instance with spaces instead of prefix | 
 | END { | 
 | 	for (i = 1; i <= NR; i = j+1) { | 
 | 		j = findrun(i)		# returns last elem of run | 
 | 		if (j > i) | 
 | 			printrun(i, j) | 
 | 		else | 
 | 			print y[i], x[i] | 
 | 	} | 
 | } | 
 |  | 
 | function findrun(s,	j, p, np) {	# find y[s],y[s+1]... with same prefix | 
 | 	p = prefix(y[s]) | 
 | 	np = length(p) | 
 | 	for (j = s+1; j <= NR; j++) { | 
 | 		if (y[j] == p)			# same, so include | 
 | 			continue | 
 | 		if (index(y[j], p) != 1)	# no match | 
 | 			break | 
 | 		c = substr(y[j], np+1, 1) | 
 | 		if (c != " " && c != ",")	# has to be whole word prefix | 
 | 			break | 
 | 	} | 
 | 	return j-1 | 
 | } | 
 |  | 
 | function prefix(s,	n) {	# find 1st word of s: same sort key, minus , | 
 | 	gsub(/,/, "", s) | 
 | 	n = index(s, " ") | 
 | 	if (n > 0) | 
 | 		return substr(s, 1, n-1) | 
 | 	else | 
 | 		return s | 
 | } | 
 |  | 
 | function printrun(s, e,		i) {	# move [...] to end, "see" to front | 
 | 	s1 = 0; e1 = 0; p1 = 0; i1 = 0 | 
 | 	for (i = s; i <= e; i++) { | 
 | 		if (x[i] ~ /{see/) {		# see, see also | 
 | 			sx[s1] = x[i] | 
 | 			sy[s1] = y[i] | 
 | 			s1++ | 
 | 		} else if (x[i] ~ /^\[/) {	# prefix word is [...] | 
 | 			px[p1] = x[i] | 
 | 			py[p1] = y[i] | 
 | 			p1++ | 
 | 		} else if (x[i] ~ /\[.*\]/) {	# [...] somewhere else | 
 | 			ex[e1] = x[i] | 
 | 			ey[e1] = y[i] | 
 | 			e1++ | 
 | 		} else {			# none of the above | 
 | 			ix[i1] = x[i] | 
 | 			iy[i1] = y[i] | 
 | 			i1++ | 
 | 		} | 
 | 	} | 
 | 	if (e-s+1 != s1 + p1 + i1 + e1) print "oh shit" >"/dev/stderr" | 
 |  | 
 | 	for (i = 0; i < s1; i++)	# "see", one/line | 
 | 		print sy[i], sx[i] | 
 | 	if (i1 > 1) | 
 | 		printgroup(ix,iy,0,i1)	# non [...] items | 
 | 	else if (i1 == 1) | 
 | 		print iy[0], ix[0] | 
 | 	if (e1 > 1) | 
 | 		printgroup(ex,ey,0,e1)	# prefix [...] items | 
 | 	else if (e1 == 1) | 
 | 		print ey[0], ex[0] | 
 | 	# for (i = 0; i < p1; i++)	# [prefix] ... items | 
 | 	# 	print py[i], px[i] | 
 | 	if (p1 > 1) | 
 | 		printgroup(px,py,0,p1)	# [prefix] ... items | 
 | 	else if (p1 == 1) | 
 | 		print py[0], px[0] | 
 | } | 
 |  | 
 | function printgroup(x, y, s, e,		i, j) { | 
 | 	split(x[s], f23) | 
 | 	if (split(f23[1], temp, " ") > 1) { | 
 | 		pfx = temp[1] " " temp[2]	# 2-word prefix | 
 | 		for (i = s+1; i < e; i++) { | 
 | 			if (index(x[i], pfx) != 1) | 
 | 				break | 
 | 			c = substr(x[i], length(pfx)+1, 1) | 
 | 			if (c != " " && c != ",")	# has to be whole word prefix | 
 | 				break | 
 | 		} | 
 | 		if (i == e) { | 
 | 			# print "got a run with", pfx | 
 | 			sub(/ /, "@", f23[1]) | 
 | 			for (i = s; i < e; i++) | 
 | 				sub(/ /, "@", x[i])	# take @ out later | 
 | 		} | 
 | 	} | 
 | 	n = sub(/,?[ ~]+.*/, "", f23[1]) # zap rest of line | 
 |  | 
 | 	sub(/,$/, "", f23[1]) | 
 | 	if (n > 0) {	# some change, so not a single word | 
 | 		sub(/@/, " ", f23[1]) | 
 | 		print y[s], f23[1]	# print main entry | 
 | 	} | 
 | 	for (j = s; j < e; j++) { | 
 | 		split(x[j], f23) | 
 | 		sub(/^[^, ]+[, ]+/, "   ", f23[1]) | 
 | 		sub(/@/, " ", f23[1]) | 
 | 		print y[s], f23[1], f23[2] | 
 | 	} | 
 | } | 
 |  | 
 | ' $* |