X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Text%2FRegex.hs;h=7a2404360b9162856df4f288a55a9fcc9266aa33;hb=3457025838609e26918bb0cf5c16393d3bb5b69b;hp=7957151add6076e7e87675ebdb53787a57b11e81;hpb=3bc707020c8d0f7a11b652c38d33f1d9c87d3ae7;p=ghc-base.git diff --git a/Text/Regex.hs b/Text/Regex.hs index 7957151..7a24043 100644 --- a/Text/Regex.hs +++ b/Text/Regex.hs @@ -1,4 +1,3 @@ -{-# OPTIONS -cpp #-} ----------------------------------------------------------------------------- -- | -- Module : Text.Regex @@ -7,22 +6,21 @@ -- -- Maintainer : libraries@haskell.org -- Stability : experimental --- Portability : non-portable (only on platforms that provide a regex lib) +-- Portability : portable -- -- Regular expression matching. Uses the POSIX regular expression -- interface in "Text.Regex.Posix". -- ----------------------------------------------------------------------------- -#include "ghcconfig.h" module Text.Regex ( -- * Regular expressions Regex, -#if !defined(__HUGS__) || defined(HAVE_REGEX_H) mkRegex, mkRegexWithOpts, matchRegex, - matchRegexAll -#endif + matchRegexAll, + subRegex, + splitRegex ) where import Prelude @@ -30,7 +28,6 @@ import qualified Text.Regex.Posix as RE import Text.Regex.Posix ( Regex ) import System.IO.Unsafe -#if !defined(__HUGS__) || defined(HAVE_REGEX_H) -- | Makes a regular expression with the default options (multi-line, -- case-sensitive). The syntax of regular expressions is -- otherwise that of @egrep@ (i.e. POSIX \"extended\" regular @@ -39,7 +36,7 @@ mkRegex :: String -> Regex mkRegex s = unsafePerformIO (RE.regcomp s RE.regExtended) -- | Makes a regular expression, where the multi-line and --- case-sensitve options can be changed from the default settings. +-- case-sensitive options can be changed from the default settings. mkRegexWithOpts :: String -- ^ The regular expression to compile -> Bool -- ^ 'True' @\<=>@ @\'^\'@ and @\'$\'@ match the beginning and @@ -84,4 +81,51 @@ matchRegexAll matchRegexAll p str = unsafePerformIO (RE.regexec p str) -#endif +{- | Replaces every occurance of the given regexp with the replacement string. + +In the replacement string, @\"\\1\"@ refers to the first substring; +@\"\\2\"@ to the second, etc; and @\"\\0\"@ to the entire match. +@\"\\\\\\\\\"@ will insert a literal backslash. + +-} +subRegex :: Regex -- ^ Search pattern + -> String -- ^ Input string + -> String -- ^ Replacement text + -> String -- ^ Output string +subRegex _ "" _ = "" +subRegex regexp inp repl = + let bre = mkRegex "\\\\(\\\\||[0-9]+)" + lookup _ [] _ = [] + lookup [] _ _ = [] + lookup match repl groups = + case matchRegexAll bre repl of + Nothing -> repl + Just (lead, _, trail, bgroups) -> + let newval = if (head bgroups) == "\\" + then "\\" + else let index = (read (head bgroups)) - 1 + in + if index == -1 + then match + else groups !! index + in + lead ++ newval ++ lookup match trail groups + in + case matchRegexAll regexp inp of + Nothing -> inp + Just (lead, match, trail, groups) -> + lead ++ lookup match repl groups ++ (subRegex regexp trail repl) + +{- | Splits a string based on a regular expression. The regular expression +should identify one delimiter. +-} + +splitRegex :: Regex -> String -> [String] +splitRegex _ [] = [] +splitRegex delim str = + case matchRegexAll delim str of + Nothing -> [str] + Just (firstline, _, remainder, _) -> + if remainder == "" + then firstline : [] : [] + else firstline : splitRegex delim remainder