1 -----------------------------------------------------------------------------
3 -- Module : Text.Regex.Posix
4 -- Copyright : (c) The University of Glasgow 2001
5 -- License : BSD-style (see the file libraries/core/LICENSE)
7 -- Maintainer : libraries@haskell.org
8 -- Stability : experimental
9 -- Portability : non-portable (only on platforms that provide POSIX regexps)
11 -- $Id: Posix.hsc,v 1.3 2001/09/13 11:39:58 simonmar Exp $
13 -- Interface to the POSIX regular expression library.
14 -- ToDo: detect regex library with configure.
15 -- ToDo: should have an interface using PackedStrings.
17 -----------------------------------------------------------------------------
19 module Text.Regex.Posix (
22 regcomp, -- :: String -> Int -> IO Regex
24 regexec, -- :: Regex -- pattern
25 -- -> String -- string to match
26 -- -> IO (Maybe (String, -- everything before match
27 -- String, -- matched portion
28 -- String, -- everything after match
29 -- [String])) -- subexpression matches
31 regExtended, -- (flag to regcomp) use extended regex syntax
32 regIgnoreCase, -- (flag to regcomp) ignore case when matching
33 regNewline -- (flag to regcomp) '.' doesn't match newline
43 newtype Regex = Regex (ForeignPtr CRegex)
45 -- -----------------------------------------------------------------------------
48 regcomp :: String -> Int -> IO Regex
49 regcomp pattern flags = do
50 regex_ptr <- mallocBytes (#const sizeof(regex_t))
51 regex_fptr <- newForeignPtr regex_ptr (regfree regex_ptr)
52 r <- withCString pattern $ \cstr ->
53 withForeignPtr regex_fptr $ \p ->
54 c_regcomp p cstr (fromIntegral flags)
56 then return (Regex regex_fptr)
57 else error "Text.Regex.Posix.regcomp: error in pattern" -- ToDo
59 regfree :: Ptr CRegex -> IO ()
64 -- -----------------------------------------------------------------------------
67 regexec :: Regex -- pattern
68 -> String -- string to match
69 -> IO (Maybe (String, -- everything before match
70 String, -- matched portion
71 String, -- everything after match
72 [String])) -- subexpression matches
74 regexec (Regex regex_fptr) str = do
75 withCString str $ \cstr -> do
76 withForeignPtr regex_fptr $ \regex_ptr -> do
77 nsub <- (#peek regex_t, re_nsub) regex_ptr
78 let nsub_int = fromIntegral (nsub :: CSize)
79 allocaBytes ((1 + nsub_int) * (#const sizeof(regmatch_t))) $ \p_match -> do
80 -- add one because index zero covers the whole match
81 r <- c_regexec regex_ptr cstr (1 + nsub) p_match 0{-no flags for now-}
83 if (r /= 0) then return Nothing else do
85 (before,match,after) <- matched_parts str p_match
88 mapM (unpack str) $ take nsub_int $ tail $
89 iterate (`plusPtr` (#const sizeof(regmatch_t))) p_match
91 return (Just (before, match, after, sub_strs))
93 matched_parts :: String -> Ptr CRegMatch -> IO (String, String, String)
94 matched_parts string p_match = do
95 start <- (#peek regmatch_t, rm_so) p_match :: IO CInt
96 end <- (#peek regmatch_t, rm_eo) p_match :: IO CInt
97 let s = fromIntegral start; e = fromIntegral end
98 return ( take (s-1) string,
99 take (e-s) (drop s string),
102 unpack :: String -> Ptr CRegMatch -> IO (String)
103 unpack string p_match = do
104 start <- (#peek regmatch_t, rm_so) p_match :: IO CInt
105 end <- (#peek regmatch_t, rm_eo) p_match :: IO CInt
106 -- the subexpression may not have matched at all, perhaps because it
107 -- was optional. In this case, the offsets are set to -1.
108 if (start == -1) then return "" else do
109 return (take (fromIntegral (end-start)) (drop (fromIntegral start) string))
111 -- -----------------------------------------------------------------------------
112 -- The POSIX regex C interface
119 -- Return values from regexec
127 regIgnoreCase = REG_ICASE, \
131 -- Error codes from regcomp
149 foreign import "regcomp" unsafe
150 c_regcomp :: Ptr CRegex -> CString -> CInt -> IO CInt
152 foreign import "regfree" unsafe
153 c_regfree :: Ptr CRegex -> IO ()
155 foreign import "regexec" unsafe
156 c_regexec :: Ptr CRegex -> CString -> CSize
157 -> Ptr CRegMatch -> CInt -> IO CInt