rsc | cfa37a7 | 2004-04-10 18:53:55 +0000 | [diff] [blame] | 1 | .TH REGEXP 3 |
rsc | b2cfc4e | 2003-09-30 17:47:41 +0000 | [diff] [blame] | 2 | .SH NAME |
rsc | cfa37a7 | 2004-04-10 18:53:55 +0000 | [diff] [blame] | 3 | regcomp, regcomplit, regcompnl, regexec, regsub, rregexec, rregsub, regerror \- regular expression |
rsc | b2cfc4e | 2003-09-30 17:47:41 +0000 | [diff] [blame] | 4 | .SH SYNOPSIS |
rsc | cfa37a7 | 2004-04-10 18:53:55 +0000 | [diff] [blame] | 5 | .B #include <u.h> |
| 6 | .br |
| 7 | .B #include <libc.h> |
| 8 | .br |
| 9 | .B #include <regexp.h> |
rsc | b2cfc4e | 2003-09-30 17:47:41 +0000 | [diff] [blame] | 10 | .PP |
| 11 | .ta \w'\fLRegprog 'u |
| 12 | .B |
| 13 | Reprog *regcomp(char *exp) |
| 14 | .PP |
| 15 | .B |
| 16 | Reprog *regcomplit(char *exp) |
| 17 | .PP |
| 18 | .B |
| 19 | Reprog *regcompnl(char *exp) |
| 20 | .PP |
| 21 | .nf |
| 22 | .B |
| 23 | int regexec(Reprog *prog, char *string, Resub *match, int msize) |
| 24 | .PP |
| 25 | .nf |
| 26 | .B |
| 27 | void regsub(char *source, char *dest, int dlen, Resub *match, int msize) |
| 28 | .PP |
| 29 | .nf |
| 30 | .B |
| 31 | int rregexec(Reprog *prog, Rune *string, Resub *match, int msize) |
| 32 | .PP |
| 33 | .nf |
| 34 | .B |
| 35 | void rregsub(Rune *source, Rune *dest, int dlen, Resub *match, int msize) |
| 36 | .PP |
| 37 | .B |
| 38 | void regerror(char *msg) |
| 39 | .SH DESCRIPTION |
| 40 | .I Regcomp |
| 41 | compiles a |
| 42 | regular expression and returns |
| 43 | a pointer to the generated description. |
| 44 | The space is allocated by |
rsc | bf8a59f | 2004-04-11 03:42:27 +0000 | [diff] [blame] | 45 | .IR malloc (3) |
rsc | b2cfc4e | 2003-09-30 17:47:41 +0000 | [diff] [blame] | 46 | and may be released by |
| 47 | .IR free . |
| 48 | Regular expressions are exactly as in |
rsc | 058b011 | 2005-01-03 06:40:20 +0000 | [diff] [blame] | 49 | .IR regexp (7). |
rsc | b2cfc4e | 2003-09-30 17:47:41 +0000 | [diff] [blame] | 50 | .PP |
| 51 | .I Regcomplit |
| 52 | is like |
| 53 | .I regcomp |
| 54 | except that all characters are treated literally. |
| 55 | .I Regcompnl |
| 56 | is like |
| 57 | .I regcomp |
| 58 | except that the |
| 59 | .B . |
| 60 | metacharacter matches all characters, including newlines. |
| 61 | .PP |
| 62 | .I Regexec |
| 63 | matches a null-terminated |
| 64 | .I string |
| 65 | against the compiled regular expression in |
| 66 | .IR prog . |
| 67 | If it matches, |
| 68 | .I regexec |
| 69 | returns |
| 70 | .B 1 |
| 71 | and fills in the array |
| 72 | .I match |
| 73 | with character pointers to the substrings of |
| 74 | .I string |
| 75 | that correspond to the |
| 76 | parenthesized subexpressions of |
| 77 | .IR exp : |
| 78 | .BI match[ i ].sp |
| 79 | points to the beginning and |
| 80 | .BI match[ i ].ep |
| 81 | points just beyond |
| 82 | the end of the |
| 83 | .IR i th |
| 84 | substring. |
| 85 | (Subexpression |
| 86 | .I i |
| 87 | begins at the |
| 88 | .IR i th |
| 89 | left parenthesis, counting from 1.) |
| 90 | Pointers in |
| 91 | .B match[0] |
| 92 | pick out the substring that corresponds to |
| 93 | the whole regular expression. |
| 94 | Unused elements of |
| 95 | .I match |
| 96 | are filled with zeros. |
| 97 | Matches involving |
| 98 | .LR * , |
| 99 | .LR + , |
| 100 | and |
| 101 | .L ? |
| 102 | are extended as far as possible. |
| 103 | The number of array elements in |
| 104 | .I match |
| 105 | is given by |
| 106 | .IR msize . |
| 107 | The structure of elements of |
| 108 | .I match |
| 109 | is: |
| 110 | .IP |
| 111 | .EX |
| 112 | typedef struct { |
| 113 | union { |
| 114 | char *sp; |
| 115 | Rune *rsp; |
rsc | c8b6342 | 2005-01-13 04:49:19 +0000 | [diff] [blame] | 116 | } s; |
rsc | b2cfc4e | 2003-09-30 17:47:41 +0000 | [diff] [blame] | 117 | union { |
| 118 | char *ep; |
| 119 | Rune *rep; |
rsc | c8b6342 | 2005-01-13 04:49:19 +0000 | [diff] [blame] | 120 | } e; |
rsc | b2cfc4e | 2003-09-30 17:47:41 +0000 | [diff] [blame] | 121 | } Resub; |
| 122 | .EE |
| 123 | .LP |
| 124 | If |
rsc | c8b6342 | 2005-01-13 04:49:19 +0000 | [diff] [blame] | 125 | .B match[0].s.sp |
rsc | b2cfc4e | 2003-09-30 17:47:41 +0000 | [diff] [blame] | 126 | is nonzero on entry, |
| 127 | .I regexec |
| 128 | starts matching at that point within |
| 129 | .IR string . |
| 130 | If |
rsc | c8b6342 | 2005-01-13 04:49:19 +0000 | [diff] [blame] | 131 | .B match[0].e.ep |
rsc | b2cfc4e | 2003-09-30 17:47:41 +0000 | [diff] [blame] | 132 | is nonzero on entry, |
| 133 | the last character matched is the one |
| 134 | preceding that point. |
| 135 | .PP |
| 136 | .I Regsub |
| 137 | places in |
| 138 | .I dest |
| 139 | a substitution instance of |
| 140 | .I source |
| 141 | in the context of the last |
| 142 | .I regexec |
| 143 | performed using |
| 144 | .IR match . |
| 145 | Each instance of |
| 146 | .BI \e n\f1, |
| 147 | where |
| 148 | .I n |
| 149 | is a digit, is replaced by the |
| 150 | string delimited by |
rsc | cfa37a7 | 2004-04-10 18:53:55 +0000 | [diff] [blame] | 151 | .BI match[ n ].sp |
rsc | b2cfc4e | 2003-09-30 17:47:41 +0000 | [diff] [blame] | 152 | and |
rsc | cfa37a7 | 2004-04-10 18:53:55 +0000 | [diff] [blame] | 153 | .BI match[ n ].ep\f1. |
rsc | b2cfc4e | 2003-09-30 17:47:41 +0000 | [diff] [blame] | 154 | Each instance of |
| 155 | .L & |
| 156 | is replaced by the string delimited by |
rsc | cfa37a7 | 2004-04-10 18:53:55 +0000 | [diff] [blame] | 157 | .B match[0].sp |
rsc | b2cfc4e | 2003-09-30 17:47:41 +0000 | [diff] [blame] | 158 | and |
rsc | cfa37a7 | 2004-04-10 18:53:55 +0000 | [diff] [blame] | 159 | .BR match[0].ep . |
rsc | b2cfc4e | 2003-09-30 17:47:41 +0000 | [diff] [blame] | 160 | The substitution will always be null terminated and |
| 161 | trimmed to fit into dlen bytes. |
| 162 | .PP |
| 163 | .IR Regerror , |
| 164 | called whenever an error is detected in |
| 165 | .IR regcomp , |
| 166 | writes the string |
| 167 | .I msg |
| 168 | on the standard error file and exits. |
| 169 | .I Regerror |
| 170 | can be replaced to perform |
| 171 | special error processing. |
| 172 | If the user supplied |
| 173 | .I regerror |
| 174 | returns rather than exits, |
| 175 | .I regcomp |
| 176 | will return 0. |
| 177 | .PP |
| 178 | .I Rregexec |
| 179 | and |
| 180 | .I rregsub |
| 181 | are variants of |
| 182 | .I regexec |
| 183 | and |
| 184 | .I regsub |
| 185 | that use strings of |
| 186 | .B Runes |
| 187 | instead of strings of |
| 188 | .BR chars . |
| 189 | With these routines, the |
| 190 | .I rsp |
| 191 | and |
| 192 | .I rep |
| 193 | fields of the |
| 194 | .I match |
| 195 | array elements should be used. |
rsc | cfa37a7 | 2004-04-10 18:53:55 +0000 | [diff] [blame] | 196 | .SH SOURCE |
rsc | c3674de | 2005-01-11 17:37:33 +0000 | [diff] [blame] | 197 | .B \*9/src/libregexp |
rsc | b2cfc4e | 2003-09-30 17:47:41 +0000 | [diff] [blame] | 198 | .SH "SEE ALSO" |
rsc | cfa37a7 | 2004-04-10 18:53:55 +0000 | [diff] [blame] | 199 | .IR grep (1) |
rsc | b2cfc4e | 2003-09-30 17:47:41 +0000 | [diff] [blame] | 200 | .SH DIAGNOSTICS |
| 201 | .I Regcomp |
| 202 | returns |
| 203 | .B 0 |
| 204 | for an illegal expression |
| 205 | or other failure. |
| 206 | .I Regexec |
| 207 | returns 0 |
| 208 | if |
| 209 | .I string |
| 210 | is not matched. |
rsc | b2cfc4e | 2003-09-30 17:47:41 +0000 | [diff] [blame] | 211 | .SH BUGS |
| 212 | There is no way to specify or match a NUL character; NULs terminate patterns and strings. |