X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Text%2FRegex%2FPosix.hsc;h=0f28295dce3814c5362cdd788cb64150307fd1c5;hb=e9e2a5412bb7cda8d13a063ac403d9f18ac97380;hp=8df4331f1ce8e72e969c5f70d832358bd1c0c806;hpb=9a47cd94702d25060bc49fbf1f136855af9a9512;p=ghc-base.git diff --git a/Text/Regex/Posix.hsc b/Text/Regex/Posix.hsc index 8df4331..0f28295 100644 --- a/Text/Regex/Posix.hsc +++ b/Text/Regex/Posix.hsc @@ -1,25 +1,38 @@ ----------------------------------------------------------------------------- --- +-- | -- Module : Text.Regex.Posix --- Copyright : (c) The University of Glasgow 2001 --- License : BSD-style (see the file libraries/core/LICENSE) +-- Copyright : (c) The University of Glasgow 2002 +-- License : BSD-style (see the file libraries/base/LICENSE) -- -- Maintainer : libraries@haskell.org -- Stability : experimental --- Portability : non-portable (only on platforms that provide POSIX regexps) --- --- $Id: Posix.hsc,v 1.5 2002/02/14 14:08:50 simonmar Exp $ +-- Portability : portable -- -- Interface to the POSIX regular expression library. --- ToDo: should have an interface using PackedStrings. -- ----------------------------------------------------------------------------- +-- ToDo: should have an interface using PackedStrings. +#ifndef __NHC__ +#include "HsBaseConfig.h" +#else +#define HAVE_REGEX_H 1 +#define HAVE_REGCOMP 1 +#endif + module Text.Regex.Posix ( + -- * The @Regex@ type Regex, -- abstract + -- * Compiling a regular expression regcomp, -- :: String -> Int -> IO Regex + -- ** Flags for regcomp + regExtended, -- (flag to regcomp) use extended regex syntax + regIgnoreCase, -- (flag to regcomp) ignore case when matching + regNewline, -- (flag to regcomp) '.' doesn't match newline + + -- * Matching a regular expression regexec, -- :: Regex -- pattern -- -> String -- string to match -- -> IO (Maybe (String, -- everything before match @@ -27,48 +40,67 @@ module Text.Regex.Posix ( -- String, -- everything after match -- [String])) -- subexpression matches - regExtended, -- (flag to regcomp) use extended regex syntax - regIgnoreCase, -- (flag to regcomp) ignore case when matching - regNewline -- (flag to regcomp) '.' doesn't match newline ) where +#include + +#if HAVE_REGEX_H && HAVE_REGCOMP #include "regex.h" +#else +#include "regex/regex.h" + +-- CFILES stuff is Hugs only +{-# CFILES cbits/regex/reallocf.c #-} +{-# CFILES cbits/regex/regcomp.c #-} +{-# CFILES cbits/regex/regerror.c #-} +{-# CFILES cbits/regex/regexec.c #-} +{-# CFILES cbits/regex/regfree.c #-} +#endif import Prelude import Foreign import Foreign.C +type CRegex = () + +-- | A compiled regular expression newtype Regex = Regex (ForeignPtr CRegex) -- ----------------------------------------------------------------------------- -- regcomp -regcomp :: String -> Int -> IO Regex +-- | Compiles a regular expression +regcomp + :: String -- ^ The regular expression to compile + -> Int -- ^ Flags (summed together) + -> IO Regex -- ^ Returns: the compiled regular expression regcomp pattern flags = do - regex_ptr <- mallocBytes (#const sizeof(regex_t)) - regex_fptr <- newForeignPtr regex_ptr (regfree regex_ptr) + regex_fptr <- mallocForeignPtrBytes (#const sizeof(regex_t)) r <- withCString pattern $ \cstr -> withForeignPtr regex_fptr $ \p -> c_regcomp p cstr (fromIntegral flags) if (r == 0) - then return (Regex regex_fptr) + then do addForeignPtrFinalizer ptr_regfree regex_fptr + return (Regex regex_fptr) else error "Text.Regex.Posix.regcomp: error in pattern" -- ToDo -regfree :: Ptr CRegex -> IO () -regfree p_regex = do - c_regfree p_regex - free p_regex - -- ----------------------------------------------------------------------------- -- regexec -regexec :: Regex -- pattern - -> String -- string to match - -> IO (Maybe (String, -- everything before match - String, -- matched portion - String, -- everything after match - [String])) -- subexpression matches +-- | Matches a regular expression against a string +regexec :: Regex -- ^ Compiled regular expression + -> String -- ^ String to match against + -> IO (Maybe (String, String, String, [String])) + -- ^ Returns: 'Nothing' if the regex did not match the + -- string, or: + -- + -- @ + -- 'Just' (everything before match, + -- matched portion, + -- everything after match, + -- subexpression matches) + -- @ regexec (Regex regex_fptr) str = do withCString str $ \cstr -> do @@ -81,31 +113,31 @@ regexec (Regex regex_fptr) str = do if (r /= 0) then return Nothing else do - (before,match,after) <- matched_parts str p_match + (before,match,after) <- matched_parts str p_match - sub_strs <- - mapM (unpack str) $ take nsub_int $ tail $ - iterate (`plusPtr` (#const sizeof(regmatch_t))) p_match + sub_strs <- + mapM (unpack str) $ take nsub_int $ tail $ + iterate (`plusPtr` (#const sizeof(regmatch_t))) p_match - return (Just (before, match, after, sub_strs)) + return (Just (before, match, after, sub_strs)) matched_parts :: String -> Ptr CRegMatch -> IO (String, String, String) matched_parts string p_match = do - start <- (#peek regmatch_t, rm_so) p_match :: IO CInt - end <- (#peek regmatch_t, rm_eo) p_match :: IO CInt + start <- (#peek regmatch_t, rm_so) p_match :: IO (#type regoff_t) + end <- (#peek regmatch_t, rm_eo) p_match :: IO (#type regoff_t) let s = fromIntegral start; e = fromIntegral end - return ( take (s-1) string, + return ( take s string, take (e-s) (drop s string), drop e string ) unpack :: String -> Ptr CRegMatch -> IO (String) unpack string p_match = do - start <- (#peek regmatch_t, rm_so) p_match :: IO CInt - end <- (#peek regmatch_t, rm_eo) p_match :: IO CInt + start <- (#peek regmatch_t, rm_so) p_match :: IO (#type regoff_t) + end <- (#peek regmatch_t, rm_eo) p_match :: IO (#type regoff_t) -- the subexpression may not have matched at all, perhaps because it -- was optional. In this case, the offsets are set to -1. if (start == -1) then return "" else do - return (take (fromIntegral (end-start)) (drop (fromIntegral start) string)) + return (take (fromIntegral (end-start)) (drop (fromIntegral start) string)) -- ----------------------------------------------------------------------------- -- The POSIX regex C interface @@ -113,7 +145,7 @@ unpack string p_match = do -- Flags for regexec #enum Int,, \ REG_NOTBOL, \ - REG_NOTEOL \ + REG_NOTEOL -- Return values from regexec #enum Int,, \ @@ -142,15 +174,43 @@ unpack string p_match = do REG_ERANGE, \ REG_ESPACE -type CRegex = () type CRegMatch = () +-- GHC and Hugs get the appropriate include file from the OPTIONS +-- pragma generated by hsc2hs from the above #include. +-- Implementations following the FFI spec have to specify it in the +-- foreign import, which is awkward because some systems provide +-- regex.h and the rest of the regex library, but otherwise we +-- need to use our own copy, regex/regex.h. + +#if __GLASGOW_HASKELL__ || __HUGS__ foreign import ccall unsafe "regcomp" c_regcomp :: Ptr CRegex -> CString -> CInt -> IO CInt -foreign import ccall unsafe "regfree" - c_regfree :: Ptr CRegex -> IO () +foreign import ccall unsafe "®free" + ptr_regfree :: FunPtr (Ptr CRegex -> IO ()) foreign import ccall unsafe "regexec" c_regexec :: Ptr CRegex -> CString -> CSize -> Ptr CRegMatch -> CInt -> IO CInt +#elif HAVE_REGEX_H && HAVE_REGCOMP +foreign import ccall unsafe "regex.h regcomp" + c_regcomp :: Ptr CRegex -> CString -> CInt -> IO CInt + +foreign import ccall unsafe "regex.h ®free" + ptr_regfree :: FunPtr (Ptr CRegex -> IO ()) + +foreign import ccall unsafe "regex.h regexec" + c_regexec :: Ptr CRegex -> CString -> CSize + -> Ptr CRegMatch -> CInt -> IO CInt +#else +foreign import ccall unsafe "regex/regex.h regcomp" + c_regcomp :: Ptr CRegex -> CString -> CInt -> IO CInt + +foreign import ccall unsafe "regex/regex.h ®free" + ptr_regfree :: FunPtr (Ptr CRegex -> IO ()) + +foreign import ccall unsafe "regex/regex.h regexec" + c_regexec :: Ptr CRegex -> CString -> CSize + -> Ptr CRegMatch -> CInt -> IO CInt +#endif