1 -----------------------------------------------------------------------------
4 -- Copyright : (c) The University of Glasgow 2001
5 -- License : BSD-style (see the file libraries/base/LICENSE)
7 -- Maintainer : libraries@haskell.org
8 -- Stability : experimental
9 -- Portability : portable
11 -- Regular expression matching. Uses the POSIX regular expression
12 -- interface in "Text.Regex.Posix".
14 -----------------------------------------------------------------------------
16 -- * Regular expressions
27 import qualified Text.Regex.Posix as RE
28 import Text.Regex.Posix ( Regex )
29 import System.IO.Unsafe
31 -- | Makes a regular expression with the default options (multi-line,
32 -- case-sensitive). The syntax of regular expressions is
33 -- otherwise that of @egrep@ (i.e. POSIX \"extended\" regular
35 mkRegex :: String -> Regex
36 mkRegex s = unsafePerformIO (RE.regcomp s RE.regExtended)
38 -- | Makes a regular expression, where the multi-line and
39 -- case-sensitive options can be changed from the default settings.
41 :: String -- ^ The regular expression to compile
42 -> Bool -- ^ 'True' @\<=>@ @\'^\'@ and @\'$\'@ match the beginning and
43 -- end of individual lines respectively, and @\'.\'@ does /not/
44 -- match the newline character.
45 -> Bool -- ^ 'True' @\<=>@ matching is case-sensitive
46 -> Regex -- ^ Returns: the compiled regular expression
48 mkRegexWithOpts s single_line case_sensitive
49 = unsafePerformIO (RE.regcomp s (RE.regExtended + newline + igcase))
51 newline | single_line = RE.regNewline
54 igcase | case_sensitive = 0
55 | otherwise = RE.regIgnoreCase
57 -- | Match a regular expression against a string
59 :: Regex -- ^ The regular expression
60 -> String -- ^ The string to match against
61 -> Maybe [String] -- ^ Returns: @'Just' strs@ if the match succeeded
62 -- (and @strs@ is the list of subexpression matches),
63 -- or 'Nothing' otherwise.
65 case (unsafePerformIO (RE.regexec p str)) of
67 Just (before, match, after, sub_strs) -> Just sub_strs
69 -- | Match a regular expression against a string, returning more information
72 :: Regex -- ^ The regular expression
73 -> String -- ^ The string to match against
74 -> Maybe ( String, String, String, [String] )
75 -- ^ Returns: 'Nothing' if the match failed, or:
77 -- > Just ( everything before match,
79 -- > everything after the match,
80 -- > subexpression matches )
82 matchRegexAll p str = unsafePerformIO (RE.regexec p str)
84 {- | Replaces every occurance of the given regexp with the replacement string.
86 In the replacement string, @\"\\1\"@ refers to the first substring;
87 @\"\\2\"@ to the second, etc; and @\"\\0\"@ to the entire match.
88 @\"\\\\\\\\\"@ will insert a literal backslash.
91 subRegex :: Regex -- ^ Search pattern
92 -> String -- ^ Input string
93 -> String -- ^ Replacement text
94 -> String -- ^ Output string
96 subRegex regexp inp repl =
97 let bre = mkRegex "\\\\(\\\\||[0-9]+)"
100 lookup match repl groups =
101 case matchRegexAll bre repl of
103 Just (lead, _, trail, bgroups) ->
104 let newval = if (head bgroups) == "\\"
106 else let index = (read (head bgroups)) - 1
112 lead ++ newval ++ lookup match trail groups
114 case matchRegexAll regexp inp of
116 Just (lead, match, trail, groups) ->
117 lead ++ lookup match repl groups ++ (subRegex regexp trail repl)
119 {- | Splits a string based on a regular expression. The regular expression
120 should identify one delimiter.
123 splitRegex :: Regex -> String -> [String]
125 splitRegex delim str =
126 case matchRegexAll delim str of
128 Just (firstline, _, remainder, _) ->
130 then firstline : [] : []
131 else firstline : splitRegex delim remainder