1 \section[match]{PackedString functions for matching}
3 This module provides regular expression matching and substitution
4 at the PackedString level. It is built on top of the GNU Regex
5 library modified to handle perl regular expression syntax.
6 For a complete description of the perl syntax, do `man perlre`
7 or have a gander in (Programming|Learning) Perl. Here's
10 ^ matches the beginning of line
12 \b matches word boundary
13 \B matches non-word boundary
14 \w matches a word(alpha-numeric) character
15 \W matches a non-word character
17 \D matches a non-digit
19 \S matches non-whitespace
20 \A matches beginning of buffer
21 \Z matches end-of-buffer
22 . matches any (bar newline in single-line mode)
23 + matches 1 or more times
24 * matches 0 or more times
26 {n,m} matches >=n and <=m atoms
27 {n,} matches at least n times
29 [..] matches any character member of char class.
30 (..) if pattern inside parens match, then the ith group is bound
32 \digit matches whatever the ith group matched.
74 import Array ((!), bounds)
75 import Char ( isDigit, ord )
76 import PrelBase ( Char(..) )
82 \subsection[ps-matching]{PackedString matching}
84 Posix matching, returning an array of the the intervals that
85 the individual groups matched within the string.
89 matchPS :: PackedString -- reg. exp
90 -> PackedString -- string to match
95 insensitive = 'i' `elem` flags
96 mode = 's' `elem` flags
99 pat <- re_compile_pattern reg mode insensitive
100 re_match pat str 0 True)
103 match2PS :: PackedString -- reg. exp
104 -> PackedString -- string1 to match
105 -> PackedString -- string2 to match
108 match2PS reg str1 str2 flags
110 insensitive = 'i' `elem` flags
111 mode = 's' `elem` flags
116 pat <- re_compile_pattern reg mode insensitive
117 re_match2 pat str1 str2 0 (len1+len2) True)
121 PackedString front-end to searching with GNU Regex
125 searchPS :: PackedString -- reg. exp
126 -> PackedString -- string to match
129 searchPS reg str flags
131 insensitive = 'i' `elem` flags
132 mode = 's' `elem` flags
135 pat <- re_compile_pattern reg mode insensitive
143 search2PS :: PackedString -- reg. exp
144 -> PackedString -- string to match
145 -> PackedString -- string to match
148 search2PS reg str1 str2 flags
150 insensitive = 'i' `elem` flags
151 mode = 's' `elem` flags
157 pat <- re_compile_pattern reg mode insensitive
170 @substrPS s st end@ cuts out the chunk in \tr{s} between \tr{st} and \tr{end}, inclusive.
171 The \tr{Regex} registers represent substrings by storing the start and the end point plus
172 one( st==end => empty string) , so we use @chunkPS@ instead.
177 chunkPS :: PackedString
184 substrPS str st (max 0 (end-1))
188 Perl-like match and substitute
192 substPS :: PackedString -- reg. exp
193 -> PackedString -- replacement
195 -> PackedString -- string
203 global = 'g' `elem` flags
204 case_insensitive = 'i' `elem` flags
205 mode = 's' `elem` flags -- single-line mode
206 pat = unsafePerformIO (
207 re_compile_pattern rexp mode case_insensitive)
212 = unsafePerformIO (re_search pat str 0 (lengthPS str) True)
216 Just matcher@(REmatch arr before match after lst) ->
219 prefix = chunkPS str before
221 = if global && (st /= en) then
222 search (dropPS en str)
227 replace matcher repl str,
235 replace (REmatch arr before@(_,b_end) match after lst)
238 = concatPS (reverse acc) -- ToDo: write a `reversed' version of concatPS
242 acc = replace' [] replacement False
244 single :: Char -> PackedString
245 single x = consPS x nilPS
247 replace' :: [PackedString]
251 replace' acc repl escaped
252 = if (nullPS repl) then
257 x# = case x of { C# c -> c }
265 replace' ((single x):acc) xs (not escaped)
267 if (not escaped) then
272 (num,xs_num) = getNumber ((ord x') - ord '0') xs'
274 if (isDigit x') && (num<=b) then
275 replace' ((chunkPS str ith_ival):acc) xs_num escaped
276 else if x' == '&' then
277 replace' ((chunkPS str match):acc) xs' escaped
278 else if x' == '+' then
279 replace' ((chunkPS str lst):acc) xs' escaped
280 else if x' == '`' then
281 replace' ((chunkPS str (0,b_end)):acc) xs' escaped
282 else if x' == '\'' then
283 replace' ((chunkPS str after):acc) xs' escaped
285 replace' acc xs escaped
287 replace' ((single x):acc) xs False
292 replace' ((single '\n'):acc)
294 replace' ((single '\f'):acc)
295 'r'# -> -- carriage return
296 replace' ((single '\r'):acc)
297 't'# -> -- (horiz) tab
298 replace' ((single '\t'):acc)
299 'v'# -> -- vertical tab
300 replace' ((single '\v'):acc)
301 'a'# -> -- alarm bell
302 replace' ((single '\a'):acc)
304 replace' ((single '\033'):acc)
306 replace' ((single x):acc)) xs False
308 replace' ((single x):acc) xs False
311 getNumber :: Int -> PackedString -> (Int,PackedString)
321 getNumber (acc*10+(ord x - ord '0')) xs
327 Just like substPS, but no prefix and suffix.
331 replacePS :: PackedString -- reg. exp
332 -> PackedString -- replacement
334 -> PackedString -- string
342 global = 'g' `elem` flags
343 case_insensitive = 'i' `elem` flags
344 mode = 's' `elem` flags -- single-line mode
345 pat = unsafePerformIO (
346 re_compile_pattern rexp mode case_insensitive)
351 = unsafePerformIO (re_search pat str 0 (lengthPS str) True)
355 Just matcher@(REmatch arr before match after lst) ->
356 replace matcher repl str
360 Picking matched groups out of string
364 getMatchesNo :: REmatch
366 getMatchesNo (REmatch arr _ _ _ _)
369 getMatchedGroup :: REmatch
373 getMatchedGroup (REmatch arr bef mtch after lst) nth str
375 (1,grps) = bounds arr
377 if (nth >= 1) && (nth <= grps) then
378 chunkPS str (arr!nth)
380 error "getMatchedGroup: group out of range"
382 getWholeMatch :: REmatch
385 getWholeMatch (REmatch _ _ mtch _ _) str
388 getLastMatch :: REmatch
391 getLastMatch (REmatch _ _ _ _ lst) str
394 getAfterMatch :: REmatch
397 getAfterMatch (REmatch _ _ _ aft _) str
403 More or less straight translation of a brute-force string matching
404 function written in C. (Sedgewick ch. 18)
406 This is intended to provide much the same facilities as index/rindex in perl.
411 findPS :: PackedString
420 | j>=m || i>=n = if j==m then (Just (i-m)) else Nothing
425 = if j<m && i<n && (indexPS str i /= indexPS substr j) then
432 rfindPS :: PackedString
437 m = lengthPS substr - 1
441 | j<0 || i<0 = if j<0 then (Just (i+1)) else Nothing
446 = if j>=0 && i>=0 && (indexPS str i /= indexPS substr j) then
447 inner_loop (i+(m-j)-1) m
458 chopPS :: PackedString -> PackedString
459 chopPS str = if nullPS str then
462 chunkPS str (0,lengthPS str-1)
466 Tries to match as much as possible of strA starting from the beginning of strB
467 (handy when matching fancy literals in parsers)
470 matchPrefixPS :: PackedString
473 matchPrefixPS pref str
474 = matchPrefixPS' pref str 0
476 matchPrefixPS' pref str n
477 = if (nullPS pref) || (nullPS str) then
479 else if (headPS pref) == (headPS str) then
480 matchPrefixPS' (tailPS pref) (tailPS str) (n+1)