| awk ' # gen.key |
| # Input: Each input line has one of the following two forms: |
| # string (tab) numlist |
| # string " %key " sort.key (tab) numlist |
| # Output: Each output line has the form: |
| # sort.key (tab) string (tab) numlist |
| |
| BEGIN { FS = OFS = "\t" } |
| |
| / %key / { # use sort.key if it is provided |
| i = index($1, " %key ") |
| print substr($1, i+6), substr($1, 1, i-1), $2 |
| next |
| } |
| |
| { # generate sort.key (in $2, by modifying string) if it is not provided |
| $3 = $2 |
| $2 = $1 |
| |
| #Modify sort.key |
| # Remove some troff commands |
| gsub(/\\f\(..|\\f.|\\s[+-][0-9]|\\s[0-9][0-9]?/, "", $2) |
| |
| # underscore -> 0, so "foo_gorp" sorts before "food" |
| gsub(/_/, "0", $2) |
| |
| # quote character is %, space character is ~ |
| quoted = 0 |
| if ($2 ~ /%/) { # hide quoted literals in Q |
| quoted = 1 |
| gsub(/%%/, "QQ0QQ", $2) |
| gsub(/%\[/, "QQ1QQ", $2) |
| gsub(/%\]/, "QQ2QQ", $2) |
| gsub(/%\{/, "QQ3QQ", $2) |
| gsub(/%\}/, "QQ4QQ", $2) |
| gsub(/%~/, "QQ5QQ", $2) |
| } |
| gsub(/%e/, "\\", $2) # implement troff escape |
| gsub(/~/, " ", $2) # remove tildes |
| gsub(/[%\[\]\{\}]/, "", $2) # remove % and font-changing []{} |
| if (quoted) { # restore literals but without escape charcter |
| gsub(/QQ0QQ/, "%", $2) |
| gsub(/QQ1QQ/, "[", $2) |
| gsub(/QQ2QQ/, "]", $2) |
| gsub(/QQ3QQ/, "{", $2) |
| gsub(/QQ4QQ/, "}", $2) |
| gsub(/QQ5QQ/, "~", $2) |
| } |
| if ($2 ~ /^[^a-zA-Z]+$/) # purely nonalphabetic lines go first |
| $2 = " " $2 |
| else if ($2 ~ /^[0-9]/) # lines with eading digits come next |
| $2 = " " $2 |
| # otherwise whatever final.sort does |
| } |
| |
| { print $2, $1, $3 } |
| ' $* |