X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Text%2FRegex.hs;h=152616a69a753fbc1f1e6012819c309bdaebc65c;hb=30464c0cb915c2ae900909568fa8677bba341e45;hp=62901a51fe78adb8f25274994ced0b65413f0769;hpb=852e5b04e8a8ed6a2d71bfecda76a31c438a1fdb;p=haskell-directory.git diff --git a/Text/Regex.hs b/Text/Regex.hs index 62901a5..152616a 100644 --- a/Text/Regex.hs +++ b/Text/Regex.hs @@ -6,29 +6,28 @@ -- -- Maintainer : libraries@haskell.org -- Stability : experimental --- Portability : non-portable (only on platforms that provide a regex lib) +-- Portability : portable -- -- Regular expression matching. Uses the POSIX regular expression -- interface in "Text.Regex.Posix". -- ----------------------------------------------------------------------------- - module Text.Regex ( -- * Regular expressions Regex, mkRegex, mkRegexWithOpts, matchRegex, - matchRegexAll + matchRegexAll, + subRegex, + splitRegex ) where import Prelude import qualified Text.Regex.Posix as RE +import Text.Regex.Posix ( Regex ) import System.IO.Unsafe --- | A compiled regular expression -type Regex = RE.Regex - -- | Makes a regular expression with the default options (multi-line, -- case-sensitive). The syntax of regular expressions is -- otherwise that of @egrep@ (i.e. POSIX \"extended\" regular @@ -37,11 +36,11 @@ mkRegex :: String -> Regex mkRegex s = unsafePerformIO (RE.regcomp s RE.regExtended) -- | Makes a regular expression, where the multi-line and --- case-sensitve options can be changed from the default settings. +-- case-sensitive options can be changed from the default settings. mkRegexWithOpts :: String -- ^ The regular expression to compile - -> Bool -- ^ 'True' @\<=>@ '@^@' and '@$@' match the beginning and - -- end of individual lines respectively, and '.' does /not/ + -> Bool -- ^ 'True' @\<=>@ @\'^\'@ and @\'$\'@ match the beginning and + -- end of individual lines respectively, and @\'.\'@ does /not/ -- match the newline character. -> Bool -- ^ 'True' @\<=>@ matching is case-sensitive -> Regex -- ^ Returns: the compiled regular expression @@ -49,8 +48,8 @@ mkRegexWithOpts mkRegexWithOpts s single_line case_sensitive = unsafePerformIO (RE.regcomp s (RE.regExtended + newline + igcase)) where - newline | single_line = 0 - | otherwise = RE.regNewline + newline | single_line = RE.regNewline + | otherwise = 0 igcase | case_sensitive = 0 | otherwise = RE.regIgnoreCase @@ -82,3 +81,51 @@ matchRegexAll matchRegexAll p str = unsafePerformIO (RE.regexec p str) +{- | Replaces every occurance of the given regexp with the replacement string. + +In the replacement string, @\"\\1\"@ refers to the first substring; +@\"\\2\"@ to the second, etc; and @\"\\0\"@ to the entire match. +@\"\\\\\\\\\"@ will insert a literal backslash. + +-} +subRegex :: Regex -- ^ Search pattern + -> String -- ^ Input string + -> String -- ^ Replacement text + -> String -- ^ Output string +subRegex _ "" _ = "" +subRegex regexp inp repl = + let bre = mkRegex "\\\\(\\\\|[0-9]+)" + lookup _ [] _ = [] + lookup [] _ _ = [] + lookup match repl groups = + case matchRegexAll bre repl of + Nothing -> repl + Just (lead, _, trail, bgroups) -> + let newval = if (head bgroups) == "\\" + then "\\" + else let index = (read (head bgroups)) - 1 + in + if index == -1 + then match + else groups !! index + in + lead ++ newval ++ lookup match trail groups + in + case matchRegexAll regexp inp of + Nothing -> inp + Just (lead, match, trail, groups) -> + lead ++ lookup match repl groups ++ (subRegex regexp trail repl) + +{- | Splits a string based on a regular expression. The regular expression +should identify one delimiter. +-} + +splitRegex :: Regex -> String -> [String] +splitRegex _ [] = [] +splitRegex delim str = + case matchRegexAll delim str of + Nothing -> [str] + Just (firstline, _, remainder, _) -> + if remainder == "" + then firstline : [] : [] + else firstline : splitRegex delim remainder