X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Text%2FRegex.hs;h=7a2404360b9162856df4f288a55a9fcc9266aa33;hb=66681b296084bc3683cab50986402e5da27f5912;hp=a0d724e3d9c3ce74a79fdc9367f0a86ceace4e31;hpb=e26cd9742d4ac05614191f394fbc5b752809cec4;p=ghc-base.git diff --git a/Text/Regex.hs b/Text/Regex.hs index a0d724e..7a24043 100644 --- a/Text/Regex.hs +++ b/Text/Regex.hs @@ -6,20 +6,21 @@ -- -- Maintainer : libraries@haskell.org -- Stability : experimental --- Portability : non-portable (only on platforms that provide a regex lib) +-- Portability : portable -- -- Regular expression matching. Uses the POSIX regular expression -- interface in "Text.Regex.Posix". -- ----------------------------------------------------------------------------- - module Text.Regex ( -- * Regular expressions Regex, mkRegex, mkRegexWithOpts, matchRegex, - matchRegexAll + matchRegexAll, + subRegex, + splitRegex ) where import Prelude @@ -35,11 +36,11 @@ mkRegex :: String -> Regex mkRegex s = unsafePerformIO (RE.regcomp s RE.regExtended) -- | Makes a regular expression, where the multi-line and --- case-sensitve options can be changed from the default settings. +-- case-sensitive options can be changed from the default settings. mkRegexWithOpts :: String -- ^ The regular expression to compile - -> Bool -- ^ 'True' @\<=>@ '@^@' and '@$@' match the beginning and - -- end of individual lines respectively, and '.' does /not/ + -> Bool -- ^ 'True' @\<=>@ @\'^\'@ and @\'$\'@ match the beginning and + -- end of individual lines respectively, and @\'.\'@ does /not/ -- match the newline character. -> Bool -- ^ 'True' @\<=>@ matching is case-sensitive -> Regex -- ^ Returns: the compiled regular expression @@ -47,8 +48,8 @@ mkRegexWithOpts mkRegexWithOpts s single_line case_sensitive = unsafePerformIO (RE.regcomp s (RE.regExtended + newline + igcase)) where - newline | single_line = 0 - | otherwise = RE.regNewline + newline | single_line = RE.regNewline + | otherwise = 0 igcase | case_sensitive = 0 | otherwise = RE.regIgnoreCase @@ -80,3 +81,51 @@ matchRegexAll matchRegexAll p str = unsafePerformIO (RE.regexec p str) +{- | Replaces every occurance of the given regexp with the replacement string. + +In the replacement string, @\"\\1\"@ refers to the first substring; +@\"\\2\"@ to the second, etc; and @\"\\0\"@ to the entire match. +@\"\\\\\\\\\"@ will insert a literal backslash. + +-} +subRegex :: Regex -- ^ Search pattern + -> String -- ^ Input string + -> String -- ^ Replacement text + -> String -- ^ Output string +subRegex _ "" _ = "" +subRegex regexp inp repl = + let bre = mkRegex "\\\\(\\\\||[0-9]+)" + lookup _ [] _ = [] + lookup [] _ _ = [] + lookup match repl groups = + case matchRegexAll bre repl of + Nothing -> repl + Just (lead, _, trail, bgroups) -> + let newval = if (head bgroups) == "\\" + then "\\" + else let index = (read (head bgroups)) - 1 + in + if index == -1 + then match + else groups !! index + in + lead ++ newval ++ lookup match trail groups + in + case matchRegexAll regexp inp of + Nothing -> inp + Just (lead, match, trail, groups) -> + lead ++ lookup match repl groups ++ (subRegex regexp trail repl) + +{- | Splits a string based on a regular expression. The regular expression +should identify one delimiter. +-} + +splitRegex :: Regex -> String -> [String] +splitRegex _ [] = [] +splitRegex delim str = + case matchRegexAll delim str of + Nothing -> [str] + Just (firstline, _, remainder, _) -> + if remainder == "" + then firstline : [] : [] + else firstline : splitRegex delim remainder