|  | # turn output of mkindex into form needed by dict | 
|  | BEGIN { | 
|  | if(ARGC != 2) { | 
|  | print "Usage: awk -F'	' -f canonind.awk rawindex > index" | 
|  | exit 1 | 
|  | } | 
|  | file = ARGV[1] | 
|  | ARGV[1] = "" | 
|  | while ((getline < file) > 0) { | 
|  | for(i = 2; i <= NF; i++) { | 
|  | w = $i | 
|  | if(length(w) == 0) | 
|  | continue | 
|  | b = index(w, "(") | 
|  | e = index(w, ")") | 
|  | if(b && e && b < e) { | 
|  | w1 = substr(w, 1, b-1) | 
|  | w2 = substr(w, b+1, e-b-1) | 
|  | w3 =  substr(w, e+1) | 
|  | printf "%s%s\t%d\n", w1, w3, $1 > "junk" | 
|  | printf "%s%s%s\t%d\n", w1, w2, w3, $1 > "junk" | 
|  | } else | 
|  | printf "%s\t%d\n", w, $1 > "junk" | 
|  | } | 
|  | } | 
|  | system("sort -u -t'	' +0f -1 +0 -1 +1n -2 < junk") | 
|  | system("rm junk") | 
|  | exit 0 | 
|  | } |