1 -----------------------------------------------------------------------------
3 -- Module : Text.Regex.Posix
4 -- Copyright : (c) The University of Glasgow 2002
5 -- License : BSD-style (see the file libraries/base/LICENSE)
7 -- Maintainer : libraries@haskell.org
8 -- Stability : experimental
9 -- Portability : portable
11 -- Interface to the POSIX regular expression library.
13 -----------------------------------------------------------------------------
15 -- ToDo: should have an interface using PackedStrings.
17 #include "HsBaseConfig.h"
19 #define HAVE_REGEX_H 1
20 #define HAVE_REGCOMP 1
23 module Text.Regex.Posix (
27 -- * Compiling a regular expression
28 regcomp, -- :: String -> Int -> IO Regex
30 -- ** Flags for regcomp
31 regExtended, -- (flag to regcomp) use extended regex syntax
32 regIgnoreCase, -- (flag to regcomp) ignore case when matching
33 regNewline, -- (flag to regcomp) '.' doesn't match newline
35 -- * Matching a regular expression
36 regexec, -- :: Regex -- pattern
37 -- -> String -- string to match
38 -- -> IO (Maybe (String, -- everything before match
39 -- String, -- matched portion
40 -- String, -- everything after match
41 -- [String])) -- subexpression matches
52 -- | A compiled regular expression
53 newtype Regex = Regex (ForeignPtr CRegex)
56 -- The C-library backend
57 #include <sys/types.h>
59 #if HAVE_REGEX_H && HAVE_REGCOMP
62 #include "regex/regex.h"
64 -- CFILES stuff is Hugs only
65 {-# CFILES cbits/regex/reallocf.c #-}
66 {-# CFILES cbits/regex/regcomp.c #-}
67 {-# CFILES cbits/regex/regerror.c #-}
68 {-# CFILES cbits/regex/regexec.c #-}
69 {-# CFILES cbits/regex/regfree.c #-}
72 -- -----------------------------------------------------------------------------
75 -- | Compiles a regular expression
77 :: String -- ^ The regular expression to compile
78 -> Int -- ^ Flags (summed together)
79 -> IO Regex -- ^ Returns: the compiled regular expression
80 regcomp pattern flags = do
81 regex_fptr <- mallocForeignPtrBytes (#const sizeof(regex_t))
82 r <- withCString pattern $ \cstr ->
83 withForeignPtr regex_fptr $ \p ->
84 c_regcomp p cstr (fromIntegral flags)
86 then do addForeignPtrFinalizer ptr_regfree regex_fptr
87 return (Regex regex_fptr)
88 else error "Text.Regex.Posix.regcomp: error in pattern" -- ToDo
90 -- -----------------------------------------------------------------------------
93 -- | Matches a regular expression against a string
94 regexec :: Regex -- ^ Compiled regular expression
95 -> String -- ^ String to match against
96 -> IO (Maybe (String, String, String, [String]))
97 -- ^ Returns: 'Nothing' if the regex did not match the
101 -- 'Just' (everything before match,
103 -- everything after match,
104 -- subexpression matches)
107 regexec (Regex regex_fptr) str = do
108 withCString str $ \cstr -> do
109 withForeignPtr regex_fptr $ \regex_ptr -> do
110 nsub <- (#peek regex_t, re_nsub) regex_ptr
111 let nsub_int = fromIntegral (nsub :: CSize)
112 allocaBytes ((1 + nsub_int) * (#const sizeof(regmatch_t))) $ \p_match -> do
113 -- add one because index zero covers the whole match
114 r <- c_regexec regex_ptr cstr (1 + nsub) p_match 0{-no flags for now-}
116 if (r /= 0) then return Nothing else do
118 (before,match,after) <- matched_parts str p_match
121 mapM (unpack str) $ take nsub_int $ tail $
122 iterate (`plusPtr` (#const sizeof(regmatch_t))) p_match
124 return (Just (before, match, after, sub_strs))
126 matched_parts :: String -> Ptr CRegMatch -> IO (String, String, String)
127 matched_parts string p_match = do
128 start <- (#peek regmatch_t, rm_so) p_match :: IO (#type regoff_t)
129 end <- (#peek regmatch_t, rm_eo) p_match :: IO (#type regoff_t)
130 let s = fromIntegral start; e = fromIntegral end
131 return ( take s string,
132 take (e-s) (drop s string),
135 unpack :: String -> Ptr CRegMatch -> IO (String)
136 unpack string p_match = do
137 start <- (#peek regmatch_t, rm_so) p_match :: IO (#type regoff_t)
138 end <- (#peek regmatch_t, rm_eo) p_match :: IO (#type regoff_t)
139 -- the subexpression may not have matched at all, perhaps because it
140 -- was optional. In this case, the offsets are set to -1.
141 if (start == -1) then return "" else do
142 return (take (fromIntegral (end-start)) (drop (fromIntegral start) string))
144 -- -----------------------------------------------------------------------------
145 -- The POSIX regex C interface
152 -- Return values from regexec
160 regIgnoreCase = REG_ICASE, \
164 -- Error codes from regcomp
181 -- GHC and Hugs get the appropriate include file from the OPTIONS
182 -- pragma generated by hsc2hs from the above #include.
183 -- Implementations following the FFI spec have to specify it in the
184 -- foreign import, which is awkward because some systems provide
185 -- regex.h and the rest of the regex library, but otherwise we
186 -- need to use our own copy, regex/regex.h.
188 #if __GLASGOW_HASKELL__ || __HUGS__
189 foreign import ccall unsafe "regcomp"
190 c_regcomp :: Ptr CRegex -> CString -> CInt -> IO CInt
192 foreign import ccall unsafe "®free"
193 ptr_regfree :: FunPtr (Ptr CRegex -> IO ())
195 foreign import ccall unsafe "regexec"
196 c_regexec :: Ptr CRegex -> CString -> CSize
197 -> Ptr CRegMatch -> CInt -> IO CInt
198 #elif HAVE_REGEX_H && HAVE_REGCOMP
199 foreign import ccall unsafe "regex.h regcomp"
200 c_regcomp :: Ptr CRegex -> CString -> CInt -> IO CInt
202 foreign import ccall unsafe "regex.h ®free"
203 ptr_regfree :: FunPtr (Ptr CRegex -> IO ())
205 foreign import ccall unsafe "regex.h regexec"
206 c_regexec :: Ptr CRegex -> CString -> CSize
207 -> Ptr CRegMatch -> CInt -> IO CInt
209 foreign import ccall unsafe "regex/regex.h regcomp"
210 c_regcomp :: Ptr CRegex -> CString -> CInt -> IO CInt
212 foreign import ccall unsafe "regex/regex.h ®free"
213 ptr_regfree :: FunPtr (Ptr CRegex -> IO ())
215 foreign import ccall unsafe "regex/regex.h regexec"
216 c_regexec :: Ptr CRegex -> CString -> CSize
217 -> Ptr CRegMatch -> CInt -> IO CInt