X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Flib%2Fghc%2FMatchPS.lhs;fp=ghc%2Flib%2Fghc%2FMatchPS.lhs;h=0000000000000000000000000000000000000000;hb=30f15b4e7d579dc142537342161c460c6b80290b;hp=25b48427249222e1665d9803a8d77f77b5eb5462;hpb=4860241b9fe5daa328fbfabfa87c4af84ac49b65;p=ghc-hetmet.git diff --git a/ghc/lib/ghc/MatchPS.lhs b/ghc/lib/ghc/MatchPS.lhs deleted file mode 100644 index 25b4842..0000000 --- a/ghc/lib/ghc/MatchPS.lhs +++ /dev/null @@ -1,497 +0,0 @@ -\section[match]{PackedString functions for matching} - -This module provides regular expression matching and substitution -at the PackedString level. It is built on top of the GNU Regex -library modified to handle perl regular expression syntax. -For a complete description of the perl syntax, do `man perlre` -or have a gander in (Programming|Learning) Perl. Here's -a short summary: - -^ matches the beginning of line -$ matches end of line -\b matches word boundary -\B matches non-word boundary -\w matches a word(alpha-numeric) character -\W matches a non-word character -\d matches a digit -\D matches a non-digit -\s matches whitespace -\S matches non-whitespace -\A matches beginning of buffer -\Z matches end-of-buffer -. matches any (bar newline in single-line mode) -+ matches 1 or more times -* matches 0 or more times -? matches 0 or 1 -{n,m} matches >=n and <=m atoms -{n,} matches at least n times -{n} matches n times -[..] matches any character member of char class. -(..) if pattern inside parens match, then the ith group is bound - to the matched string -\digit matches whatever the ith group matched. - -Backslashed letters -\n newline -\r carriage return -\t tab -\f formfeed -\v vertical tab -\a alarm bell -\e escape - - -\begin{code} -module MatchPS - - ( - matchPS, - searchPS, - substPS, - replacePS, - - match2PS, - search2PS, - - getMatchesNo, - getMatchedGroup, - getWholeMatch, - getLastMatch, - getAfterMatch, - - findPS, - rfindPS, - chopPS, - - matchPrefixPS, - - REmatch(..) - ) where - -import PreludeGlaST - -import Regex - -import Core -- alas ... - -\end{code} - -_tailPS and _dropPS in PS.lhs are not to my liking, use -these instead. - -\begin{code} - -_dropPS' x str = _substrPS str x (_lengthPS str) - -_tailPS' x - = if _nullPS x then - error "_tailPS []" - else - _substrPS x 1 (_lengthPS x) - - -\end{code} - -\subsection[ps-matching]{PackedString matching} - -Posix matching, returning an array of the the intervals that -the individual groups matched within the string. - -\begin{code} - -matchPS :: _PackedString -- reg. exp - -> _PackedString -- string to match - -> [Char] -- flags - -> Maybe REmatch -matchPS reg str flags - = let - insensitive = 'i' `elem` flags - mode = 's' `elem` flags - in - unsafePerformPrimIO ( - re_compile_pattern reg mode insensitive `thenPrimIO` \ pat -> - re_match pat str 0 True) - - -match2PS :: _PackedString -- reg. exp - -> _PackedString -- string1 to match - -> _PackedString -- string2 to match - -> [Char] -- flags - -> Maybe REmatch -match2PS reg str1 str2 flags - = let - insensitive = 'i' `elem` flags - mode = 's' `elem` flags - len1 = _lengthPS str1 - len2 = _lengthPS str2 - in - unsafePerformPrimIO ( - re_compile_pattern reg mode insensitive `thenPrimIO` \ pat -> - re_match2 pat str1 str2 0 (len1+len2) True) - -\end{code} - -PackedString front-end to searching with GNU Regex - -\begin{code} - -searchPS :: _PackedString -- reg. exp - -> _PackedString -- string to match - -> [Char] -- flags - -> Maybe REmatch -searchPS reg str flags - = let - insensitive = 'i' `elem` flags - mode = 's' `elem` flags - in - unsafePerformPrimIO ( - re_compile_pattern reg mode insensitive `thenPrimIO` \ pat -> - re_search pat str - 0 - (_lengthPS str) - True) - - - -search2PS :: _PackedString -- reg. exp - -> _PackedString -- string to match - -> _PackedString -- string to match - -> [Char] -- flags - -> Maybe REmatch -search2PS reg str1 str2 flags - = let - insensitive = 'i' `elem` flags - mode = 's' `elem` flags - len1 = _lengthPS str1 - len2 = _lengthPS str2 - len = len1+len2 - in - unsafePerformPrimIO ( - re_compile_pattern reg mode insensitive `thenPrimIO` \ pat -> - re_search2 pat - str1 - str2 - 0 - len - len - True) - - - -\end{code} - -@_substrPS s st end@ cuts out the chunk in \tr{s} between \tr{st} and \tr{end}, inclusive. -The \tr{Regex} registers represent substrings by storing the start and the end point plus -one( st==end => empty string) , so we use @chunkPS@ instead. - - -\begin{code} - -_chunkPS :: _PackedString - -> (Int,Int) - -> _PackedString -_chunkPS str (st,end) - = if st==end then - _nilPS - else - _substrPS str st (max 0 (end-1)) - -\end{code} - -Perl-like match and substitute - -\begin{code} - -substPS :: _PackedString -- reg. exp - -> _PackedString -- replacement - -> [Char] -- flags - -> _PackedString -- string - -> _PackedString -substPS rexp - repl - flags - str - = search str - where - global = 'g' `elem` flags - case_insensitive = 'i' `elem` flags - mode = 's' `elem` flags -- single-line mode - pat = unsafePerformPrimIO ( - re_compile_pattern rexp mode case_insensitive) - - search str - = let - search_res - = unsafePerformPrimIO (re_search pat str 0 (_lengthPS str) True) - in - case search_res of - Nothing -> str - Just matcher@(REmatch arr before match after lst) -> - let - (st,en) = match - prefix = _chunkPS str before - suffix - = if global && (st /= en) then - search (_dropPS' en str) - else - _chunkPS str after - in - _concatPS [prefix, - replace matcher repl str, - suffix] - - -replace :: REmatch - -> _PackedString - -> _PackedString - -> _PackedString -replace (REmatch arr before@(_,b_end) match after lst) - replacement - str - = _concatPS (reverse acc) -- ToDo: write a `reversed' version of concatPS - where - (_,b) = bounds arr - - acc = replace' [] replacement False - - single :: Char -> _PackedString - single x = _consPS x _nilPS - - replace' :: [_PackedString] - -> _PackedString - -> Bool - -> [_PackedString] - replace' acc repl escaped - = if (_nullPS repl) then - acc - else - let - x = _headPS repl - x# = case x of { C# c -> c } - xs = _tailPS' repl - in - case x# of - '\\'# -> - if escaped then - replace' acc xs True - else - replace' ((single x):acc) xs (not escaped) - '$'# -> - if (not escaped) then - let - x' = _headPS xs - xs' = _tailPS' xs - ith_ival = arr!num - (num,xs_num) = getNumber ((ord x') - ord '0') xs' - in - if (isDigit x') && (num<=b) then - replace' ((_chunkPS str ith_ival):acc) xs_num escaped - else if x' == '&' then - replace' ((_chunkPS str match):acc) xs' escaped - else if x' == '+' then - replace' ((_chunkPS str lst):acc) xs' escaped - else if x' == '`' then - replace' ((_chunkPS str (0,b_end)):acc) xs' escaped - else if x' == '\'' then - replace' ((_chunkPS str after):acc) xs' escaped - else -- ignore - replace' acc xs escaped - else - replace' ((single x):acc) xs False - - _ -> if escaped then - (case x# of - 'n'# -> -- newline - replace' ((single '\n'):acc) - 'f'# -> -- formfeed - replace' ((single '\f'):acc) - 'r'# -> -- carriage return - replace' ((single '\r'):acc) - 't'# -> -- (horiz) tab - replace' ((single '\t'):acc) - 'v'# -> -- vertical tab - replace' ((single '\v'):acc) - 'a'# -> -- alarm bell - replace' ((single '\a'):acc) - 'e'# -> -- escape - replace' ((single '\033'):acc) - _ -> - replace' ((single x):acc)) xs False - else - replace' ((single x):acc) xs False - - -getNumber :: Int -> _PackedString -> (Int,_PackedString) -getNumber acc ps - = if _nullPS ps then - (acc,ps) - else - let - x = _headPS ps - xs = _tailPS ps - in - if (isDigit x) then - getNumber (acc*10+(ord x - ord '0')) xs - else - (acc,ps) - -\end{code} - -Just like substPS, but no prefix and suffix. - -\begin{code} - -replacePS :: _PackedString -- reg. exp - -> _PackedString -- replacement - -> [Char] -- flags - -> _PackedString -- string - -> _PackedString -replacePS rexp - repl - flags - str - = search str - where - global = 'g' `elem` flags - case_insensitive = 'i' `elem` flags - mode = 's' `elem` flags -- single-line mode - pat = unsafePerformPrimIO ( - re_compile_pattern rexp mode case_insensitive) - - search str - = let - search_res - = unsafePerformPrimIO (re_search pat str 0 (_lengthPS str) True) - in - case search_res of - Nothing -> str - Just matcher@(REmatch arr before match after lst) -> - replace matcher repl str - -\end{code} - -Picking matched groups out of string - -\begin{code} - -getMatchesNo :: REmatch - -> Int -getMatchesNo (REmatch arr _ _ _ _) - = snd (bounds arr) - -getMatchedGroup :: REmatch - -> Int - -> _PackedString - -> _PackedString -getMatchedGroup (REmatch arr bef mtch after lst) nth str - = let - (1,grps) = bounds arr - in - if (nth >= 1) && (nth <= grps) then - _chunkPS str (arr!nth) - else - error "getMatchedGroup: group out of range" - -getWholeMatch :: REmatch - -> _PackedString - -> _PackedString -getWholeMatch (REmatch _ _ mtch _ _) str - = _chunkPS str mtch - -getLastMatch :: REmatch - -> _PackedString - -> _PackedString -getLastMatch (REmatch _ _ _ _ lst) str - = _chunkPS str lst - -getAfterMatch :: REmatch - -> _PackedString - -> _PackedString -getAfterMatch (REmatch _ _ _ aft _) str - = _chunkPS str aft - -\end{code} - - -More or less straight translation of a brute-force string matching -function written in C. (Sedgewick ch. 18) - -This is intended to provide much the same facilities as index/rindex in perl. - -\begin{code} - - -findPS :: _PackedString - -> _PackedString - -> Maybe Int -findPS str substr - = let - m = _lengthPS substr - n = _lengthPS str - - loop i j - | j>=m || i>=n = if j==m then (Just (i-m)) else Nothing - | otherwise - = inner_loop i j - - inner_loop i j - = if j _PackedString - -> Maybe Int -rfindPS str substr - = let - m = _lengthPS substr - 1 - n = _lengthPS str - 1 - - loop i j - | j<0 || i<0 = if j<0 then (Just (i+1)) else Nothing - | otherwise - = inner_loop i j - - inner_loop i j - = if j>=0 && i>=0 && (_indexPS str i /= _indexPS substr j) then - inner_loop (i+(m-j)-1) m - else - loop (i-1) (j-1) - in - loop n m - - -\end{code} - -\begin{code} - -chopPS :: _PackedString -> _PackedString -chopPS str = if _nullPS str then - _nilPS - else - _chunkPS str (0,_lengthPS str-1) - -\end{code} - -Tries to match as much as possible of strA starting from the beginning of strB -(handy when matching fancy literals in parsers) - -\begin{code} -matchPrefixPS :: _PackedString - -> _PackedString - -> Int -matchPrefixPS pref str - = matchPrefixPS' pref str 0 - where - matchPrefixPS' pref str n - = if (_nullPS pref) || (_nullPS str) then - n - else if (_headPS pref) == (_headPS str) then - matchPrefixPS' (_tailPS pref) (_tailPS str) (n+1) - else - n - -\end{code}