-- |
-- Module : Text.Regex
-- Copyright : (c) The University of Glasgow 2001
--- License : BSD-style (see the file libraries/core/LICENSE)
+-- License : BSD-style (see the file libraries/base/LICENSE)
--
-- Maintainer : libraries@haskell.org
-- Stability : experimental
--- Portability : non-portable (only on platforms that provide a regex lib)
+-- Portability : portable
--
--- Regular expression matching.
--- Uses the POSIX regular expression interface in Text.Regex.Posix for now.
+-- Regular expression matching. Uses the POSIX regular expression
+-- interface in "Text.Regex.Posix".
--
-----------------------------------------------------------------------------
-
module Text.Regex (
+ -- * Regular expressions
Regex,
mkRegex,
mkRegexWithOpts,
matchRegex,
- matchRegexAll
+ matchRegexAll,
+ subRegex,
+ splitRegex
) where
import Prelude
import qualified Text.Regex.Posix as RE
+import Text.Regex.Posix ( Regex )
import System.IO.Unsafe
-type Regex = RE.Regex
-
+-- | Makes a regular expression with the default options (multi-line,
+-- case-sensitive). The syntax of regular expressions is
+-- otherwise that of @egrep@ (i.e. POSIX \"extended\" regular
+-- expressions).
mkRegex :: String -> Regex
mkRegex s = unsafePerformIO (RE.regcomp s RE.regExtended)
-mkRegexWithOpts :: String -> Bool -> Bool -> Regex
+-- | Makes a regular expression, where the multi-line and
+-- case-sensitive options can be changed from the default settings.
+mkRegexWithOpts
+ :: String -- ^ The regular expression to compile
+ -> Bool -- ^ 'True' @\<=>@ @\'^\'@ and @\'$\'@ match the beginning and
+ -- end of individual lines respectively, and @\'.\'@ does /not/
+ -- match the newline character.
+ -> Bool -- ^ 'True' @\<=>@ matching is case-sensitive
+ -> Regex -- ^ Returns: the compiled regular expression
+
mkRegexWithOpts s single_line case_sensitive
= unsafePerformIO (RE.regcomp s (RE.regExtended + newline + igcase))
where
- newline | single_line = 0
- | otherwise = RE.regNewline
+ newline | single_line = RE.regNewline
+ | otherwise = 0
igcase | case_sensitive = 0
| otherwise = RE.regIgnoreCase
-matchRegex :: Regex -> String -> Maybe [String]
+-- | Match a regular expression against a string
+matchRegex
+ :: Regex -- ^ The regular expression
+ -> String -- ^ The string to match against
+ -> Maybe [String] -- ^ Returns: @'Just' strs@ if the match succeeded
+ -- (and @strs@ is the list of subexpression matches),
+ -- or 'Nothing' otherwise.
matchRegex p str =
case (unsafePerformIO (RE.regexec p str)) of
Nothing -> Nothing
Just (before, match, after, sub_strs) -> Just sub_strs
-matchRegexAll :: Regex -> String ->
- Maybe ( String, -- \$`
- String, -- \$&
- String, -- \$'
- [String] -- \$1..
- )
+-- | Match a regular expression against a string, returning more information
+-- about the match.
+matchRegexAll
+ :: Regex -- ^ The regular expression
+ -> String -- ^ The string to match against
+ -> Maybe ( String, String, String, [String] )
+ -- ^ Returns: 'Nothing' if the match failed, or:
+ --
+ -- > Just ( everything before match,
+ -- > portion matched,
+ -- > everything after the match,
+ -- > subexpression matches )
+
matchRegexAll p str = unsafePerformIO (RE.regexec p str)
+{- | Replaces every occurance of the given regexp with the replacement string.
+
+In the replacement string, @\"\\1\"@ refers to the first substring;
+@\"\\2\"@ to the second, etc; and @\"\\0\"@ to the entire match.
+@\"\\\\\\\\\"@ will insert a literal backslash.
+
+-}
+subRegex :: Regex -- ^ Search pattern
+ -> String -- ^ Input string
+ -> String -- ^ Replacement text
+ -> String -- ^ Output string
+subRegex _ "" _ = ""
+subRegex regexp inp repl =
+ let bre = mkRegex "\\\\(\\\\||[0-9]+)"
+ lookup _ [] _ = []
+ lookup [] _ _ = []
+ lookup match repl groups =
+ case matchRegexAll bre repl of
+ Nothing -> repl
+ Just (lead, _, trail, bgroups) ->
+ let newval = if (head bgroups) == "\\"
+ then "\\"
+ else let index = (read (head bgroups)) - 1
+ in
+ if index == -1
+ then match
+ else groups !! index
+ in
+ lead ++ newval ++ lookup match trail groups
+ in
+ case matchRegexAll regexp inp of
+ Nothing -> inp
+ Just (lead, match, trail, groups) ->
+ lead ++ lookup match repl groups ++ (subRegex regexp trail repl)
+
+{- | Splits a string based on a regular expression. The regular expression
+should identify one delimiter.
+-}
+
+splitRegex :: Regex -> String -> [String]
+splitRegex _ [] = []
+splitRegex delim str =
+ case matchRegexAll delim str of
+ Nothing -> [str]
+ Just (firstline, _, remainder, _) ->
+ if remainder == ""
+ then firstline : [] : []
+ else firstline : splitRegex delim remainder