1 -----------------------------------------------------------------------------
3 -- Module : Text.Regex.Posix
4 -- Copyright : (c) The University of Glasgow 2002
5 -- License : BSD-style (see the file libraries/base/LICENSE)
7 -- Maintainer : libraries@haskell.org
8 -- Stability : experimental
9 -- Portability : non-portable (needs POSIX regexps)
11 -- Interface to the POSIX regular expression library.
13 -----------------------------------------------------------------------------
15 -- ToDo: should have an interface using PackedStrings.
18 module Text.Regex.Posix (
22 #if !defined(__HUGS__) || defined(HAVE_REGEX_H)
23 -- * Compiling a regular expression
24 regcomp, -- :: String -> Int -> IO Regex
26 -- ** Flags for regcomp
27 regExtended, -- (flag to regcomp) use extended regex syntax
28 regIgnoreCase, -- (flag to regcomp) ignore case when matching
29 regNewline, -- (flag to regcomp) '.' doesn't match newline
31 -- * Matching a regular expression
32 regexec, -- :: Regex -- pattern
33 -- -> String -- string to match
34 -- -> IO (Maybe (String, -- everything before match
35 -- String, -- matched portion
36 -- String, -- everything after match
37 -- [String])) -- subexpression matches
42 #if !defined(__HUGS__) || defined(HAVE_REGEX_H)
43 #include <sys/types.h>
54 -- | A compiled regular expression
55 newtype Regex = Regex (ForeignPtr CRegex)
57 #if !defined(__HUGS__) || defined(HAVE_REGEX_H)
59 -- -----------------------------------------------------------------------------
62 -- | Compiles a regular expression
64 :: String -- ^ The regular expression to compile
65 -> Int -- ^ Flags (summed together)
66 -> IO Regex -- ^ Returns: the compiled regular expression
67 regcomp pattern flags = do
68 regex_fptr <- mallocForeignPtrBytes (#const sizeof(regex_t))
69 r <- withCString pattern $ \cstr ->
70 withForeignPtr regex_fptr $ \p ->
71 c_regcomp p cstr (fromIntegral flags)
73 then do addForeignPtrFinalizer ptr_regfree regex_fptr
74 return (Regex regex_fptr)
75 else error "Text.Regex.Posix.regcomp: error in pattern" -- ToDo
77 -- -----------------------------------------------------------------------------
80 -- | Matches a regular expression against a string
81 regexec :: Regex -- ^ Compiled regular expression
82 -> String -- ^ String to match against
83 -> IO (Maybe (String, String, String, [String]))
84 -- ^ Returns: 'Nothing' if the regex did not match the
88 -- 'Just' (everything before match,
90 -- everything after match,
91 -- subexpression matches)
94 regexec (Regex regex_fptr) str = do
95 withCString str $ \cstr -> do
96 withForeignPtr regex_fptr $ \regex_ptr -> do
97 nsub <- (#peek regex_t, re_nsub) regex_ptr
98 let nsub_int = fromIntegral (nsub :: CSize)
99 allocaBytes ((1 + nsub_int) * (#const sizeof(regmatch_t))) $ \p_match -> do
100 -- add one because index zero covers the whole match
101 r <- c_regexec regex_ptr cstr (1 + nsub) p_match 0{-no flags for now-}
103 if (r /= 0) then return Nothing else do
105 (before,match,after) <- matched_parts str p_match
108 mapM (unpack str) $ take nsub_int $ tail $
109 iterate (`plusPtr` (#const sizeof(regmatch_t))) p_match
111 return (Just (before, match, after, sub_strs))
113 matched_parts :: String -> Ptr CRegMatch -> IO (String, String, String)
114 matched_parts string p_match = do
115 start <- (#peek regmatch_t, rm_so) p_match :: IO (#type regoff_t)
116 end <- (#peek regmatch_t, rm_eo) p_match :: IO (#type regoff_t)
117 let s = fromIntegral start; e = fromIntegral end
118 return ( take s string,
119 take (e-s) (drop s string),
122 unpack :: String -> Ptr CRegMatch -> IO (String)
123 unpack string p_match = do
124 start <- (#peek regmatch_t, rm_so) p_match :: IO (#type regoff_t)
125 end <- (#peek regmatch_t, rm_eo) p_match :: IO (#type regoff_t)
126 -- the subexpression may not have matched at all, perhaps because it
127 -- was optional. In this case, the offsets are set to -1.
128 if (start == -1) then return "" else do
129 return (take (fromIntegral (end-start)) (drop (fromIntegral start) string))
131 -- -----------------------------------------------------------------------------
132 -- The POSIX regex C interface
139 -- Return values from regexec
147 regIgnoreCase = REG_ICASE, \
151 -- Error codes from regcomp
168 foreign import ccall unsafe "regcomp"
169 c_regcomp :: Ptr CRegex -> CString -> CInt -> IO CInt
171 foreign import ccall unsafe "®free"
172 ptr_regfree :: FunPtr (Ptr CRegex -> IO ())
174 foreign import ccall unsafe "regexec"
175 c_regexec :: Ptr CRegex -> CString -> CSize
176 -> Ptr CRegMatch -> CInt -> IO CInt
178 #endif /* HAVE_REGEX_H */