X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Text%2FRegex.hs;h=7a2404360b9162856df4f288a55a9fcc9266aa33;hb=b0968b64c77ee590c277c2986a630d3ba97dacff;hp=9551102a57b3dff179451c876e2622d0a6f8d8b8;hpb=f7a485978f04e84b086f1974b88887cc72d832d0;p=ghc-base.git diff --git a/Text/Regex.hs b/Text/Regex.hs index 9551102..7a24043 100644 --- a/Text/Regex.hs +++ b/Text/Regex.hs @@ -6,51 +6,126 @@ -- -- Maintainer : libraries@haskell.org -- Stability : experimental --- Portability : non-portable (only on platforms that provide a regex lib) +-- Portability : portable -- --- Regular expression matching. --- Uses the POSIX regular expression interface in Text.Regex.Posix for now. +-- Regular expression matching. Uses the POSIX regular expression +-- interface in "Text.Regex.Posix". -- ----------------------------------------------------------------------------- - module Text.Regex ( + -- * Regular expressions Regex, mkRegex, mkRegexWithOpts, matchRegex, - matchRegexAll + matchRegexAll, + subRegex, + splitRegex ) where import Prelude import qualified Text.Regex.Posix as RE +import Text.Regex.Posix ( Regex ) import System.IO.Unsafe -type Regex = RE.Regex - +-- | Makes a regular expression with the default options (multi-line, +-- case-sensitive). The syntax of regular expressions is +-- otherwise that of @egrep@ (i.e. POSIX \"extended\" regular +-- expressions). mkRegex :: String -> Regex mkRegex s = unsafePerformIO (RE.regcomp s RE.regExtended) -mkRegexWithOpts :: String -> Bool -> Bool -> Regex +-- | Makes a regular expression, where the multi-line and +-- case-sensitive options can be changed from the default settings. +mkRegexWithOpts + :: String -- ^ The regular expression to compile + -> Bool -- ^ 'True' @\<=>@ @\'^\'@ and @\'$\'@ match the beginning and + -- end of individual lines respectively, and @\'.\'@ does /not/ + -- match the newline character. + -> Bool -- ^ 'True' @\<=>@ matching is case-sensitive + -> Regex -- ^ Returns: the compiled regular expression + mkRegexWithOpts s single_line case_sensitive = unsafePerformIO (RE.regcomp s (RE.regExtended + newline + igcase)) where - newline | single_line = 0 - | otherwise = RE.regNewline + newline | single_line = RE.regNewline + | otherwise = 0 igcase | case_sensitive = 0 | otherwise = RE.regIgnoreCase -matchRegex :: Regex -> String -> Maybe [String] +-- | Match a regular expression against a string +matchRegex + :: Regex -- ^ The regular expression + -> String -- ^ The string to match against + -> Maybe [String] -- ^ Returns: @'Just' strs@ if the match succeeded + -- (and @strs@ is the list of subexpression matches), + -- or 'Nothing' otherwise. matchRegex p str = case (unsafePerformIO (RE.regexec p str)) of Nothing -> Nothing Just (before, match, after, sub_strs) -> Just sub_strs -matchRegexAll :: Regex -> String -> - Maybe ( String, -- \$` - String, -- \$& - String, -- \$' - [String] -- \$1.. - ) +-- | Match a regular expression against a string, returning more information +-- about the match. +matchRegexAll + :: Regex -- ^ The regular expression + -> String -- ^ The string to match against + -> Maybe ( String, String, String, [String] ) + -- ^ Returns: 'Nothing' if the match failed, or: + -- + -- > Just ( everything before match, + -- > portion matched, + -- > everything after the match, + -- > subexpression matches ) + matchRegexAll p str = unsafePerformIO (RE.regexec p str) +{- | Replaces every occurance of the given regexp with the replacement string. + +In the replacement string, @\"\\1\"@ refers to the first substring; +@\"\\2\"@ to the second, etc; and @\"\\0\"@ to the entire match. +@\"\\\\\\\\\"@ will insert a literal backslash. + +-} +subRegex :: Regex -- ^ Search pattern + -> String -- ^ Input string + -> String -- ^ Replacement text + -> String -- ^ Output string +subRegex _ "" _ = "" +subRegex regexp inp repl = + let bre = mkRegex "\\\\(\\\\||[0-9]+)" + lookup _ [] _ = [] + lookup [] _ _ = [] + lookup match repl groups = + case matchRegexAll bre repl of + Nothing -> repl + Just (lead, _, trail, bgroups) -> + let newval = if (head bgroups) == "\\" + then "\\" + else let index = (read (head bgroups)) - 1 + in + if index == -1 + then match + else groups !! index + in + lead ++ newval ++ lookup match trail groups + in + case matchRegexAll regexp inp of + Nothing -> inp + Just (lead, match, trail, groups) -> + lead ++ lookup match repl groups ++ (subRegex regexp trail repl) + +{- | Splits a string based on a regular expression. The regular expression +should identify one delimiter. +-} + +splitRegex :: Regex -> String -> [String] +splitRegex _ [] = [] +splitRegex delim str = + case matchRegexAll delim str of + Nothing -> [str] + Just (firstline, _, remainder, _) -> + if remainder == "" + then firstline : [] : [] + else firstline : splitRegex delim remainder