1 -----------------------------------------------------------------------------
3 -- Module : Text.Regex.Posix
4 -- Copyright : (c) The University of Glasgow 2002
5 -- License : BSD-style (see the file libraries/base/LICENSE)
7 -- Maintainer : libraries@haskell.org
8 -- Stability : experimental
9 -- Portability : non-portable (needs POSIX regexps)
11 -- Interface to the POSIX regular expression library.
13 -----------------------------------------------------------------------------
15 -- ToDo: should have an interface using PackedStrings.
17 module Text.Regex.Posix (
21 -- * Compiling a regular expression
22 regcomp, -- :: String -> Int -> IO Regex
24 -- ** Flags for regcomp
25 regExtended, -- (flag to regcomp) use extended regex syntax
26 regIgnoreCase, -- (flag to regcomp) ignore case when matching
27 regNewline, -- (flag to regcomp) '.' doesn't match newline
29 -- * Matching a regular expression
30 regexec, -- :: Regex -- pattern
31 -- -> String -- string to match
32 -- -> IO (Maybe (String, -- everything before match
33 -- String, -- matched portion
34 -- String, -- everything after match
35 -- [String])) -- subexpression matches
39 #include <sys/types.h>
47 -- | A compiled regular expression
48 newtype Regex = Regex (ForeignPtr CRegex)
50 -- -----------------------------------------------------------------------------
53 -- | Compiles a regular expression
55 :: String -- ^ The regular expression to compile
56 -> Int -- ^ Flags (summed together)
57 -> IO Regex -- ^ Returns: the compiled regular expression
58 regcomp pattern flags = do
59 regex_ptr <- mallocBytes (#const sizeof(regex_t))
60 regex_fptr <- newForeignPtr regex_ptr (regfree regex_ptr)
61 r <- withCString pattern $ \cstr ->
62 withForeignPtr regex_fptr $ \p ->
63 c_regcomp p cstr (fromIntegral flags)
65 then return (Regex regex_fptr)
66 else error "Text.Regex.Posix.regcomp: error in pattern" -- ToDo
68 regfree :: Ptr CRegex -> IO ()
73 -- -----------------------------------------------------------------------------
76 -- | Matches a regular expression against a string
77 regexec :: Regex -- ^ Compiled regular expression
78 -> String -- ^ String to match against
79 -> IO (Maybe (String, String, String, [String]))
80 -- ^ Returns: 'Nothing' if the regex did not match the
83 -- > 'Just' (everything before match,
85 -- > everything after match,
86 -- > subexpression matches)
88 regexec (Regex regex_fptr) str = do
89 withCString str $ \cstr -> do
90 withForeignPtr regex_fptr $ \regex_ptr -> do
91 nsub <- (#peek regex_t, re_nsub) regex_ptr
92 let nsub_int = fromIntegral (nsub :: CSize)
93 allocaBytes ((1 + nsub_int) * (#const sizeof(regmatch_t))) $ \p_match -> do
94 -- add one because index zero covers the whole match
95 r <- c_regexec regex_ptr cstr (1 + nsub) p_match 0{-no flags for now-}
97 if (r /= 0) then return Nothing else do
99 (before,match,after) <- matched_parts str p_match
102 mapM (unpack str) $ take nsub_int $ tail $
103 iterate (`plusPtr` (#const sizeof(regmatch_t))) p_match
105 return (Just (before, match, after, sub_strs))
107 matched_parts :: String -> Ptr CRegMatch -> IO (String, String, String)
108 matched_parts string p_match = do
109 start <- (#peek regmatch_t, rm_so) p_match :: IO CInt
110 end <- (#peek regmatch_t, rm_eo) p_match :: IO CInt
111 let s = fromIntegral start; e = fromIntegral end
112 return ( take (s-1) string,
113 take (e-s) (drop s string),
116 unpack :: String -> Ptr CRegMatch -> IO (String)
117 unpack string p_match = do
118 start <- (#peek regmatch_t, rm_so) p_match :: IO CInt
119 end <- (#peek regmatch_t, rm_eo) p_match :: IO CInt
120 -- the subexpression may not have matched at all, perhaps because it
121 -- was optional. In this case, the offsets are set to -1.
122 if (start == -1) then return "" else do
123 return (take (fromIntegral (end-start)) (drop (fromIntegral start) string))
125 -- -----------------------------------------------------------------------------
126 -- The POSIX regex C interface
133 -- Return values from regexec
141 regIgnoreCase = REG_ICASE, \
145 -- Error codes from regcomp
163 foreign import ccall unsafe "regcomp"
164 c_regcomp :: Ptr CRegex -> CString -> CInt -> IO CInt
166 foreign import ccall unsafe "regfree"
167 c_regfree :: Ptr CRegex -> IO ()
169 foreign import ccall unsafe "regexec"
170 c_regexec :: Ptr CRegex -> CString -> CSize
171 -> Ptr CRegMatch -> CInt -> IO CInt