-----------------------------------------------------------------------------
---
+-- |
-- Module : Text.Regex.Posix
--- Copyright : (c) The University of Glasgow 2001
--- License : BSD-style (see the file libraries/core/LICENSE)
+-- Copyright : (c) The University of Glasgow 2002
+-- License : BSD-style (see the file libraries/base/LICENSE)
--
-- Maintainer : libraries@haskell.org
-- Stability : experimental
--- Portability : non-portable (only on platforms that provide POSIX regexps)
---
--- $Id: Posix.hsc,v 1.1 2001/08/02 11:20:50 simonmar Exp $
+-- Portability : non-portable (needs POSIX regexps)
--
-- Interface to the POSIX regular expression library.
--- ToDo: detect regex library with configure.
--- ToDo: should have an interface using PackedStrings.
--
-----------------------------------------------------------------------------
+-- ToDo: should have an interface using PackedStrings.
+#include "config.h"
+
module Text.Regex.Posix (
+ -- * The @Regex@ type
Regex, -- abstract
+ -- * Compiling a regular expression
regcomp, -- :: String -> Int -> IO Regex
+ -- ** Flags for regcomp
+ regExtended, -- (flag to regcomp) use extended regex syntax
+ regIgnoreCase, -- (flag to regcomp) ignore case when matching
+ regNewline, -- (flag to regcomp) '.' doesn't match newline
+
+ -- * Matching a regular expression
regexec, -- :: Regex -- pattern
-- -> String -- string to match
-- -> IO (Maybe (String, -- everything before match
-- String, -- everything after match
-- [String])) -- subexpression matches
- regExtended, -- (flag to regcomp) use extended regex syntax
- regIgnoreCase, -- (flag to regcomp) ignore case when matching
- regNewline -- (flag to regcomp) '.' doesn't match newline
) where
+#include <sys/types.h>
+
+#if HAVE_REGEX_H && HAVE_REGCOMP
#include "regex.h"
+#else
+#include "regex/regex.h"
+{-# CBITS regex/reallocf.c #-}
+{-# CBITS regex/regcomp.c #-}
+{-# CBITS regex/regerror.c #-}
+{-# CBITS regex/regexec.c #-}
+{-# CBITS regex/regfree.c #-}
+#endif
import Prelude
import Foreign
import Foreign.C
+type CRegex = ()
+
+-- | A compiled regular expression
newtype Regex = Regex (ForeignPtr CRegex)
-- -----------------------------------------------------------------------------
-- regcomp
-regcomp :: String -> Int -> IO Regex
+-- | Compiles a regular expression
+regcomp
+ :: String -- ^ The regular expression to compile
+ -> Int -- ^ Flags (summed together)
+ -> IO Regex -- ^ Returns: the compiled regular expression
regcomp pattern flags = do
- regex_ptr <- mallocBytes (#const sizeof(regex_t))
- regex_fptr <- newForeignPtr regex_ptr (regfree regex_ptr)
- withCString pattern $ \cstr -> do
- r <- c_regcomp regex_fptr cstr (fromIntegral flags)
- if (r == 0)
- then return (Regex regex_fptr)
- else error "Text.Regex.Posix.regcomp: error in pattern" -- ToDo
-
-regfree :: Ptr CRegex -> IO ()
-regfree p_regex = do
- c_regfree p_regex
- free p_regex
+ regex_fptr <- mallocForeignPtrBytes (#const sizeof(regex_t))
+ r <- withCString pattern $ \cstr ->
+ withForeignPtr regex_fptr $ \p ->
+ c_regcomp p cstr (fromIntegral flags)
+ if (r == 0)
+ then do addForeignPtrFinalizer ptr_regfree regex_fptr
+ return (Regex regex_fptr)
+ else error "Text.Regex.Posix.regcomp: error in pattern" -- ToDo
-- -----------------------------------------------------------------------------
-- regexec
-regexec :: Regex -- pattern
- -> String -- string to match
- -> IO (Maybe (String, -- everything before match
- String, -- matched portion
- String, -- everything after match
- [String])) -- subexpression matches
+-- | Matches a regular expression against a string
+regexec :: Regex -- ^ Compiled regular expression
+ -> String -- ^ String to match against
+ -> IO (Maybe (String, String, String, [String]))
+ -- ^ Returns: 'Nothing' if the regex did not match the
+ -- string, or:
+ --
+ -- @
+ -- 'Just' (everything before match,
+ -- matched portion,
+ -- everything after match,
+ -- subexpression matches)
+ -- @
regexec (Regex regex_fptr) str = do
- withUnsafeCString str $ \cstr -> do
- nsub <- withForeignPtr regex_fptr $ \p -> (#peek regex_t, re_nsub) p
- let nsub_int = fromIntegral (nsub :: CSize)
- allocaBytes ((1 + nsub_int) * (#const sizeof(regmatch_t))) $ \p_match -> do
+ withCString str $ \cstr -> do
+ withForeignPtr regex_fptr $ \regex_ptr -> do
+ nsub <- (#peek regex_t, re_nsub) regex_ptr
+ let nsub_int = fromIntegral (nsub :: CSize)
+ allocaBytes ((1 + nsub_int) * (#const sizeof(regmatch_t))) $ \p_match -> do
-- add one because index zero covers the whole match
- r <- c_regexec regex_fptr cstr (1 + nsub) p_match 0{-no flags for now-}
+ r <- c_regexec regex_ptr cstr (1 + nsub) p_match 0{-no flags for now-}
- if (r /= 0) then return Nothing else do
+ if (r /= 0) then return Nothing else do
- (before,match,after) <- matched_parts str p_match
+ (before,match,after) <- matched_parts str p_match
- sub_strs <-
+ sub_strs <-
mapM (unpack str) $ take nsub_int $ tail $
iterate (`plusPtr` (#const sizeof(regmatch_t))) p_match
- return (Just (before, match, after, sub_strs))
+ return (Just (before, match, after, sub_strs))
matched_parts :: String -> Ptr CRegMatch -> IO (String, String, String)
matched_parts string p_match = do
- start <- (#peek regmatch_t, rm_so) p_match :: IO CInt
- end <- (#peek regmatch_t, rm_eo) p_match :: IO CInt
+ start <- (#peek regmatch_t, rm_so) p_match :: IO (#type regoff_t)
+ end <- (#peek regmatch_t, rm_eo) p_match :: IO (#type regoff_t)
let s = fromIntegral start; e = fromIntegral end
- return ( take (s-1) string,
+ return ( take s string,
take (e-s) (drop s string),
drop e string )
unpack :: String -> Ptr CRegMatch -> IO (String)
unpack string p_match = do
- start <- (#peek regmatch_t, rm_so) p_match :: IO CInt
- end <- (#peek regmatch_t, rm_eo) p_match :: IO CInt
+ start <- (#peek regmatch_t, rm_so) p_match :: IO (#type regoff_t)
+ end <- (#peek regmatch_t, rm_eo) p_match :: IO (#type regoff_t)
-- the subexpression may not have matched at all, perhaps because it
-- was optional. In this case, the offsets are set to -1.
if (start == -1) then return "" else do
-- Flags for regexec
#enum Int,, \
REG_NOTBOL, \
- REG_NOTEOL \
+ REG_NOTEOL
-- Return values from regexec
#enum Int,, \
REG_ERANGE, \
REG_ESPACE
-type CRegex = ()
type CRegMatch = ()
-foreign import "regcomp" unsafe
- c_regcomp :: ForeignPtr CRegex -> CString -> CInt -> IO CInt
+foreign import ccall unsafe "regcomp"
+ c_regcomp :: Ptr CRegex -> CString -> CInt -> IO CInt
-foreign import "regfree" unsafe
- c_regfree :: Ptr CRegex -> IO ()
+foreign import ccall unsafe "®free"
+ ptr_regfree :: FunPtr (Ptr CRegex -> IO ())
-foreign import "regexec" unsafe
- c_regexec :: ForeignPtr CRegex -> UnsafeCString -> CSize
+foreign import ccall unsafe "regexec"
+ c_regexec :: Ptr CRegex -> CString -> CSize
-> Ptr CRegMatch -> CInt -> IO CInt