| # turn output of mkindex into form needed by dict |
| BEGIN { |
| if(ARGC != 2) { |
| print "Usage: awk -F' ' -f canonind.awk rawindex > index" |
| exit 1 |
| } |
| file = ARGV[1] |
| ARGV[1] = "" |
| while ((getline < file) > 0) { |
| for(i = 2; i <= NF; i++) { |
| w = $i |
| if(length(w) == 0) |
| continue |
| b = index(w, "(") |
| e = index(w, ")") |
| if(b && e && b < e) { |
| w1 = substr(w, 1, b-1) |
| w2 = substr(w, b+1, e-b-1) |
| w3 = substr(w, e+1) |
| printf "%s%s\t%d\n", w1, w3, $1 > "junk" |
| printf "%s%s%s\t%d\n", w1, w2, w3, $1 > "junk" |
| } else |
| printf "%s\t%d\n", w, $1 > "junk" |
| } |
| } |
| system("sort -u -t' ' +0f -1 +0 -1 +1n -2 < junk") |
| system("rm junk") |
| exit 0 |
| } |