1 \section[match]{PackedString functions for matching}
3 This module provides regular expression matching and substitution
4 at the PackedString level. It is built on top of the GNU Regex
5 library modified to handle perl regular expression syntax.
6 For a complete description of the perl syntax, do `man perlre`
7 or have a gander in (Programming|Learning) Perl. Here's
10 ^ matches the beginning of line
12 \b matches word boundary
13 \B matches non-word boundary
14 \w matches a word(alpha-numeric) character
15 \W matches a non-word character
17 \D matches a non-digit
19 \S matches non-whitespace
20 \A matches beginning of buffer
21 \Z matches end-of-buffer
22 . matches any (bar newline in single-line mode)
23 + matches 1 or more times
24 * matches 0 or more times
26 {n,m} matches >=n and <=m atoms
27 {n,} matches at least n times
29 [..] matches any character member of char class.
30 (..) if pattern inside parens match, then the ith group is bound
32 \digit matches whatever the ith group matched.
74 import Array ((!), bounds)
75 import Char ( isDigit, ord )
76 import PrelBase ( Char(..) )
82 \subsection[ps-matching]{PackedString matching}
84 Posix matching, returning an array of the the intervals that
85 the individual groups matched within the string.
89 matchPS :: PackedString -- reg. exp
90 -> PackedString -- string to match
95 insensitive = 'i' `elem` flags
96 mode = 's' `elem` flags
99 pat <- re_compile_pattern reg mode insensitive
100 re_match pat str 0 True)
103 match2PS :: PackedString -- reg. exp
104 -> PackedString -- string1 to match
105 -> PackedString -- string2 to match
108 match2PS reg str1 str2 flags
110 insensitive = 'i' `elem` flags
111 mode = 's' `elem` flags
116 pat <- re_compile_pattern reg mode insensitive
117 re_match2 pat str1 str2 0 (len1+len2) True)
121 PackedString front-end to searching with GNU Regex
125 searchPS :: PackedString -- reg. exp
126 -> PackedString -- string to match
129 searchPS reg str flags
131 insensitive = 'i' `elem` flags
132 mode = 's' `elem` flags
135 pat <- re_compile_pattern reg mode insensitive
143 search2PS :: PackedString -- reg. exp
144 -> PackedString -- string to match
145 -> PackedString -- string to match
148 search2PS reg str1 str2 flags
150 insensitive = 'i' `elem` flags
151 mode = 's' `elem` flags
157 pat <- re_compile_pattern reg mode insensitive
170 @substrPS s st end@ cuts out the chunk in \tr{s} between \tr{st} and \tr{end}, inclusive.
171 The \tr{Regex} registers represent substrings by storing the start and the end point plus
172 one( st==end => empty string) , so we use @chunkPS@ instead.
177 chunkPS :: PackedString
184 substrPS str st (max 0 (end-1))
188 Perl-like match and substitute
192 substPS :: PackedString -- reg. exp
193 -> PackedString -- replacement
195 -> PackedString -- string
197 substPS rexp repl flags pstr = search pstr
199 global = 'g' `elem` flags
200 case_insensitive = 'i' `elem` flags
201 mode = 's' `elem` flags -- single-line mode
202 pat = unsafePerformIO (
203 re_compile_pattern rexp mode case_insensitive)
208 = unsafePerformIO (re_search pat str 0 (lengthPS str) True)
212 Just matcher@(REmatch _ before match after _) ->
215 prefix = chunkPS str before
217 | global && (st /= en) = search (dropPS en str)
218 | otherwise = chunkPS str after
221 replace matcher repl str,
229 replace (REmatch arr (_,b_end) match after lst)
232 = concatPS (reverse acc) -- ToDo: write a `reversed' version of concatPS
236 acc = replace' [] replacement False
238 single :: Char -> PackedString
239 single x = consPS x nilPS
241 replace' :: [PackedString]
245 replace' acc repl escaped
250 x# = case x of { C# c -> c }
258 replace' ((single x):acc) xs (not escaped)
260 if (not escaped) then
265 (num,xs_num) = getNumber ((ord x') - ord '0') xs'
267 if (isDigit x') && (num<=b) then
268 replace' ((chunkPS str ith_ival):acc) xs_num escaped
269 else if x' == '&' then
270 replace' ((chunkPS str match):acc) xs' escaped
271 else if x' == '+' then
272 replace' ((chunkPS str lst):acc) xs' escaped
273 else if x' == '`' then
274 replace' ((chunkPS str (0,b_end)):acc) xs' escaped
275 else if x' == '\'' then
276 replace' ((chunkPS str after):acc) xs' escaped
278 replace' acc xs escaped
280 replace' ((single x):acc) xs False
285 replace' ((single '\n'):acc)
287 replace' ((single '\f'):acc)
288 'r'# -> -- carriage return
289 replace' ((single '\r'):acc)
290 't'# -> -- (horiz) tab
291 replace' ((single '\t'):acc)
292 'v'# -> -- vertical tab
293 replace' ((single '\v'):acc)
294 'a'# -> -- alarm bell
295 replace' ((single '\a'):acc)
297 replace' ((single '\033'):acc)
299 replace' ((single x):acc)) xs False
301 replace' ((single x):acc) xs False
304 getNumber :: Int -> PackedString -> (Int,PackedString)
314 getNumber (acc*10+(ord x - ord '0')) xs
320 Just like substPS, but no prefix and suffix.
324 replacePS :: PackedString -- reg. exp
325 -> PackedString -- replacement
327 -> PackedString -- string
335 case_insensitive = 'i' `elem` flags
336 mode = 's' `elem` flags -- single-line mode
337 pat = unsafePerformIO (
338 re_compile_pattern rexp mode case_insensitive)
343 = unsafePerformIO (re_search pat str 0 (lengthPS str) True)
347 Just matcher@(REmatch arr _ match _ lst) ->
348 replace matcher repl str
352 Picking matched groups out of string
356 getMatchesNo :: REmatch
358 getMatchesNo (REmatch arr _ _ _ _)
361 getMatchedGroup :: REmatch
365 getMatchedGroup (REmatch arr bef mtch _ lst) nth str
366 | (nth >= 1) && (nth <= grps) = chunkPS str (arr!nth)
367 | otherwise = error "getMatchedGroup: group out of range"
369 (1,grps) = bounds arr
371 getWholeMatch :: REmatch -> PackedString -> PackedString
372 getWholeMatch (REmatch _ _ mtch _ _) str
375 getLastMatch :: REmatch
378 getLastMatch (REmatch _ _ _ _ lst) str
381 getAfterMatch :: REmatch
384 getAfterMatch (REmatch _ _ _ aft _) str
390 More or less straight translation of a brute-force string matching
391 function written in C. (Sedgewick ch. 18)
393 This is intended to provide much the same facilities as index/rindex in perl.
398 findPS :: PackedString
407 | j>=m || i>=n = if j==m then (Just (i-m)) else Nothing
412 = if j<m && i<n && (indexPS str i /= indexPS substr j) then
419 rfindPS :: PackedString
424 m = lengthPS substr - 1
428 | j<0 || i<0 = if j<0 then (Just (i+1)) else Nothing
433 = if j>=0 && i>=0 && (indexPS str i /= indexPS substr j) then
434 inner_loop (i+(m-j)-1) m
445 chopPS :: PackedString -> PackedString
446 chopPS str = if nullPS str then
449 chunkPS str (0,lengthPS str-1)
453 Tries to match as much as possible of strA starting from the beginning of strB
454 (handy when matching fancy literals in parsers)
457 matchPrefixPS :: PackedString
460 matchPrefixPS pref str
461 = matchPrefixPS' pref str 0
463 matchPrefixPS' pref str n
464 = if (nullPS pref) || (nullPS str) then
466 else if (headPS pref) == (headPS str) then
467 matchPrefixPS' (tailPS pref) (tailPS str) (n+1)