1 -----------------------------------------------------------------------------
3 -- Module : Text.Regex.Posix
4 -- Copyright : (c) The University of Glasgow 2002
5 -- License : BSD-style (see the file libraries/base/LICENSE)
7 -- Maintainer : libraries@haskell.org
8 -- Stability : experimental
9 -- Portability : non-portable (needs POSIX regexps)
11 -- Interface to the POSIX regular expression library.
13 -----------------------------------------------------------------------------
15 -- ToDo: should have an interface using PackedStrings.
16 #include "ghcconfig.h"
18 module Text.Regex.Posix (
22 -- * Compiling a regular expression
23 regcomp, -- :: String -> Int -> IO Regex
25 -- ** Flags for regcomp
26 regExtended, -- (flag to regcomp) use extended regex syntax
27 regIgnoreCase, -- (flag to regcomp) ignore case when matching
28 regNewline, -- (flag to regcomp) '.' doesn't match newline
30 -- * Matching a regular expression
31 regexec, -- :: Regex -- pattern
32 -- -> String -- string to match
33 -- -> IO (Maybe (String, -- everything before match
34 -- String, -- matched portion
35 -- String, -- everything after match
36 -- [String])) -- subexpression matches
40 #include <sys/types.h>
42 #if HAVE_REGEX_H && HAVE_REGCOMP
45 #include "regex/regex.h"
46 {-# CBITS regex/reallocf.c #-}
47 {-# CBITS regex/regcomp.c #-}
48 {-# CBITS regex/regerror.c #-}
49 {-# CBITS regex/regexec.c #-}
50 {-# CBITS regex/regfree.c #-}
60 -- | A compiled regular expression
61 newtype Regex = Regex (ForeignPtr CRegex)
63 -- -----------------------------------------------------------------------------
66 -- | Compiles a regular expression
68 :: String -- ^ The regular expression to compile
69 -> Int -- ^ Flags (summed together)
70 -> IO Regex -- ^ Returns: the compiled regular expression
71 regcomp pattern flags = do
72 regex_fptr <- mallocForeignPtrBytes (#const sizeof(regex_t))
73 r <- withCString pattern $ \cstr ->
74 withForeignPtr regex_fptr $ \p ->
75 c_regcomp p cstr (fromIntegral flags)
77 then do addForeignPtrFinalizer ptr_regfree regex_fptr
78 return (Regex regex_fptr)
79 else error "Text.Regex.Posix.regcomp: error in pattern" -- ToDo
81 -- -----------------------------------------------------------------------------
84 -- | Matches a regular expression against a string
85 regexec :: Regex -- ^ Compiled regular expression
86 -> String -- ^ String to match against
87 -> IO (Maybe (String, String, String, [String]))
88 -- ^ Returns: 'Nothing' if the regex did not match the
92 -- 'Just' (everything before match,
94 -- everything after match,
95 -- subexpression matches)
98 regexec (Regex regex_fptr) str = do
99 withCString str $ \cstr -> do
100 withForeignPtr regex_fptr $ \regex_ptr -> do
101 nsub <- (#peek regex_t, re_nsub) regex_ptr
102 let nsub_int = fromIntegral (nsub :: CSize)
103 allocaBytes ((1 + nsub_int) * (#const sizeof(regmatch_t))) $ \p_match -> do
104 -- add one because index zero covers the whole match
105 r <- c_regexec regex_ptr cstr (1 + nsub) p_match 0{-no flags for now-}
107 if (r /= 0) then return Nothing else do
109 (before,match,after) <- matched_parts str p_match
112 mapM (unpack str) $ take nsub_int $ tail $
113 iterate (`plusPtr` (#const sizeof(regmatch_t))) p_match
115 return (Just (before, match, after, sub_strs))
117 matched_parts :: String -> Ptr CRegMatch -> IO (String, String, String)
118 matched_parts string p_match = do
119 start <- (#peek regmatch_t, rm_so) p_match :: IO (#type regoff_t)
120 end <- (#peek regmatch_t, rm_eo) p_match :: IO (#type regoff_t)
121 let s = fromIntegral start; e = fromIntegral end
122 return ( take s string,
123 take (e-s) (drop s string),
126 unpack :: String -> Ptr CRegMatch -> IO (String)
127 unpack string p_match = do
128 start <- (#peek regmatch_t, rm_so) p_match :: IO (#type regoff_t)
129 end <- (#peek regmatch_t, rm_eo) p_match :: IO (#type regoff_t)
130 -- the subexpression may not have matched at all, perhaps because it
131 -- was optional. In this case, the offsets are set to -1.
132 if (start == -1) then return "" else do
133 return (take (fromIntegral (end-start)) (drop (fromIntegral start) string))
135 -- -----------------------------------------------------------------------------
136 -- The POSIX regex C interface
143 -- Return values from regexec
151 regIgnoreCase = REG_ICASE, \
155 -- Error codes from regcomp
172 foreign import ccall unsafe "regcomp"
173 c_regcomp :: Ptr CRegex -> CString -> CInt -> IO CInt
175 foreign import ccall unsafe "®free"
176 ptr_regfree :: FunPtr (Ptr CRegex -> IO ())
178 foreign import ccall unsafe "regexec"
179 c_regexec :: Ptr CRegex -> CString -> CSize
180 -> Ptr CRegMatch -> CInt -> IO CInt