1 -----------------------------------------------------------------------------
3 -- Module : Data.PackedString
4 -- Copyright : (c) The University of Glasgow 2001
5 -- License : BSD-style (see the file libraries/base/LICENSE)
7 -- Maintainer : libraries@haskell.org
8 -- Stability : experimental
9 -- Portability : portable
11 -- An efficient implementation of strings.
13 -----------------------------------------------------------------------------
15 -- Original GHC implementation by Bryan O\'Sullivan,
16 -- rewritten to use UArray by Simon Marlow.
18 module Data.PackedString (
19 -- * The @PackedString@ type
20 PackedString, -- abstract, instances: Eq, Ord, Show, Typeable
22 -- * Converting to and from @PackedString@s
23 packString, -- :: String -> PackedString
24 unpackPS, -- :: PackedString -> String
27 -- * I\/O with @PackedString@s
28 hPutPS, -- :: Handle -> PackedString -> IO ()
29 hGetPS, -- :: Handle -> Int -> IO PackedString
32 -- * List-like manipulation functions
33 nilPS, -- :: PackedString
34 consPS, -- :: Char -> PackedString -> PackedString
35 headPS, -- :: PackedString -> Char
36 tailPS, -- :: PackedString -> PackedString
37 nullPS, -- :: PackedString -> Bool
38 appendPS, -- :: PackedString -> PackedString -> PackedString
39 lengthPS, -- :: PackedString -> Int
40 indexPS, -- :: PackedString -> Int -> Char
41 mapPS, -- :: (Char -> Char) -> PackedString -> PackedString
42 filterPS, -- :: (Char -> Bool) -> PackedString -> PackedString
43 reversePS, -- :: PackedString -> PackedString
44 concatPS, -- :: [PackedString] -> PackedString
45 elemPS, -- :: Char -> PackedString -> Bool
46 substrPS, -- :: PackedString -> Int -> Int -> PackedString
47 takePS, -- :: Int -> PackedString -> PackedString
48 dropPS, -- :: Int -> PackedString -> PackedString
49 splitAtPS, -- :: Int -> PackedString -> (PackedString, PackedString)
51 foldlPS, -- :: (a -> Char -> a) -> a -> PackedString -> a
52 foldrPS, -- :: (Char -> a -> a) -> a -> PackedString -> a
53 takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
54 dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
55 spanPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
56 breakPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
57 linesPS, -- :: PackedString -> [PackedString]
58 unlinesPS, -- :: [PackedString] -> PackedString
59 wordsPS, -- :: PackedString -> [PackedString]
60 unwordsPS, -- :: [PackedString] -> PackedString
61 splitPS, -- :: Char -> PackedString -> [PackedString]
62 splitWithPS, -- :: (Char -> Bool) -> PackedString -> [PackedString]
64 joinPS, -- :: PackedString -> [PackedString] -> PackedString
72 import Data.Array.Unboxed
79 -- -----------------------------------------------------------------------------
80 -- PackedString type declaration
82 -- | A space-efficient representation of a 'String', which supports various
83 -- efficient operations. A 'PackedString' contains full Unicode 'Char's.
84 newtype PackedString = PS (UArray Int Char)
86 instance Eq PackedString where
87 (PS x) == (PS y) = x == y
89 instance Ord PackedString where
90 compare (PS x) (PS y) = compare x y
92 --instance Read PackedString: ToDo
94 instance Show PackedString where
95 showsPrec p ps r = showsPrec p (unpackPS ps) r
98 INSTANCE_TYPEABLE0(PackedString,packedStringTc,"PackedString")
100 -- -----------------------------------------------------------------------------
101 -- Constructor functions
103 nilPS :: PackedString
104 nilPS = PS (array (0,-1) [])
106 consPS :: Char -> PackedString -> PackedString
107 consPS c cs = packString (c : (unpackPS cs)) -- ToDo:better
109 -- | Convert a 'String' into a 'PackedString'
110 packString :: String -> PackedString
111 packString str = packNChars (length str) str
113 packNChars :: Int -> [Char] -> PackedString
114 packNChars len str = PS (array (0,len-1) (zip [0..] str))
116 -- -----------------------------------------------------------------------------
117 -- Destructor functions (taking PackedStrings apart)
119 -- | Convert a 'PackedString' into a 'String'
120 unpackPS :: PackedString -> String
121 unpackPS (PS ps) = elems ps
123 -- -----------------------------------------------------------------------------
124 -- List-mimicking functions for PackedStrings
126 lengthPS :: PackedString -> Int
127 lengthPS (PS ps) = rangeSize (bounds ps)
129 indexPS :: PackedString -> Int -> Char
130 indexPS (PS ps) i = ps ! i
132 headPS :: PackedString -> Char
134 | nullPS ps = error "Data.PackedString.headPS: head []"
135 | otherwise = indexPS ps 0
137 tailPS :: PackedString -> PackedString
139 | len <= 0 = error "Data.PackedString.tailPS: tail []"
141 | otherwise = substrPS ps 1 (len - 1)
145 nullPS :: PackedString -> Bool
146 nullPS (PS ps) = rangeSize (bounds ps) == 0
148 appendPS :: PackedString -> PackedString -> PackedString
152 | otherwise = concatPS [xs,ys]
154 mapPS :: (Char -> Char) -> PackedString -> PackedString
155 mapPS f (PS ps) = PS (amap f ps)
157 filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-}
158 filterPS pred ps = packString (filter pred (unpackPS ps))
160 foldlPS :: (a -> Char -> a) -> a -> PackedString -> a
161 foldlPS f b ps = foldl f b (unpackPS ps)
163 foldrPS :: (Char -> a -> a) -> a -> PackedString -> a
164 foldrPS f v ps = foldr f v (unpackPS ps)
166 takePS :: Int -> PackedString -> PackedString
167 takePS n ps = substrPS ps 0 (n-1)
169 dropPS :: Int -> PackedString -> PackedString
170 dropPS n ps = substrPS ps n (lengthPS ps - 1)
172 splitAtPS :: Int -> PackedString -> (PackedString, PackedString)
173 splitAtPS n ps = (takePS n ps, dropPS n ps)
175 takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString
176 takeWhilePS pred ps = packString (takeWhile pred (unpackPS ps))
178 dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString
179 dropWhilePS pred ps = packString (dropWhile pred (unpackPS ps))
181 elemPS :: Char -> PackedString -> Bool
182 elemPS c ps = c `elem` unpackPS ps
184 spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
185 spanPS p ps = (takeWhilePS p ps, dropWhilePS p ps)
187 breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
188 breakPS p ps = spanPS (not . p) ps
190 linesPS :: PackedString -> [PackedString]
191 linesPS ps = splitPS '\n' ps
193 unlinesPS :: [PackedString] -> PackedString
194 unlinesPS = joinPS (packString "\n")
196 wordsPS :: PackedString -> [PackedString]
197 wordsPS ps = filter (not.nullPS) (splitWithPS isSpace ps)
199 unwordsPS :: [PackedString] -> PackedString
200 unwordsPS = joinPS (packString " ")
202 reversePS :: PackedString -> PackedString
203 reversePS ps = packString (reverse (unpackPS ps))
205 concatPS :: [PackedString] -> PackedString
206 concatPS pss = packString (concat (map unpackPS pss))
208 ------------------------------------------------------------
210 joinPS :: PackedString -> [PackedString] -> PackedString
211 joinPS filler pss = concatPS (splice pss)
215 splice (x:y:xs) = x:filler:splice (y:xs)
217 -- ToDo: the obvious generalisation
219 Some properties that hold:
222 where False = any (map (x `elemPS`) ls')
224 * joinPS (packString [x]) (splitPS x ls) = ls
227 splitPS :: Char -> PackedString -> [PackedString]
228 splitPS c = splitWithPS (== c)
230 splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString]
231 splitWithPS pred (PS ps) =
234 len = lengthPS (PS ps)
240 break_pt = first_pos_that_satisfies pred ps len n
242 if break_pt == n then -- immediate match, empty substring
244 : splitify (break_pt + 1)
246 substrPS (PS ps) n (break_pt - 1) -- leave out the matching character
247 : splitify (break_pt + 1)
249 first_pos_that_satisfies pred ps len n =
250 case [ m | m <- [n..len-1], pred (ps ! m) ] of
254 -- -----------------------------------------------------------------------------
255 -- Local utility functions
257 -- The definition of @_substrPS@ is essentially:
258 -- @take (end - begin + 1) (drop begin str)@.
260 substrPS :: PackedString -> Int -> Int -> PackedString
261 substrPS (PS ps) begin end = packString [ ps ! i | i <- [begin..end] ]
263 -- -----------------------------------------------------------------------------
266 -- | Outputs a 'PackedString' to the specified 'Handle'.
268 -- NOTE: the representation of the 'PackedString' in the file is assumed to
269 -- be in the ISO-8859-1 encoding. In other words, only the least signficant
270 -- byte is taken from each character in the 'PackedString'.
271 hPutPS :: Handle -> PackedString -> IO ()
272 hPutPS h (PS ps) = do
273 let l = lengthPS (PS ps)
274 arr <- newArray_ (0, l-1)
275 sequence_ [ writeArray arr i (fromIntegral (ord (ps ! i))) | i <- [0..l-1] ]
278 -- -----------------------------------------------------------------------------
281 -- | Read a 'PackedString' directly from the specified 'Handle'. This
282 -- is far more efficient than reading the characters into a 'String'
283 -- and then using 'packString'.
285 -- NOTE: as with 'hPutPS', the string representation in the file is
286 -- assumed to be ISO-8859-1.
287 hGetPS :: Handle -> Int -> IO PackedString
289 arr <- newArray_ (0, i-1)
290 l <- hGetArray h arr i
291 chars <- mapM (\i -> readArray arr i >>= return.chr.fromIntegral) [0..l-1]
292 return (packString chars)
296 --import Prelude hiding (append, break, concat, cons, drop, dropWhile,
297 -- filter, foldl, foldr, head, length, lines, map,
298 -- nil, null, reverse, span, splitAt, subst, tail,
299 -- take, takeWhile, unlines, unwords, words)
300 -- also hiding: Ix(..), Functor(..)
301 import qualified NHC.PackedString
302 import NHC.PackedString (PackedString,packString,unpackPS)
303 import List (intersperse)
306 nilPS :: PackedString
307 consPS :: Char -> PackedString -> PackedString
308 headPS :: PackedString -> Char
309 tailPS :: PackedString -> PackedString
310 nullPS :: PackedString -> Bool
311 appendPS :: PackedString -> PackedString -> PackedString
312 lengthPS :: PackedString -> Int
313 indexPS :: PackedString -> Int -> Char
314 mapPS :: (Char -> Char) -> PackedString -> PackedString
315 filterPS :: (Char -> Bool) -> PackedString -> PackedString
316 reversePS :: PackedString -> PackedString
317 concatPS :: [PackedString] -> PackedString
318 elemPS :: Char -> PackedString -> Bool
319 substrPS :: PackedString -> Int -> Int -> PackedString
320 takePS :: Int -> PackedString -> PackedString
321 dropPS :: Int -> PackedString -> PackedString
322 splitAtPS :: Int -> PackedString -> (PackedString, PackedString)
324 foldlPS :: (a -> Char -> a) -> a -> PackedString -> a
325 foldrPS :: (Char -> a -> a) -> a -> PackedString -> a
326 takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString
327 dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString
328 spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
329 breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
330 linesPS :: PackedString -> [PackedString]
331 unlinesPS :: [PackedString] -> PackedString
333 wordsPS :: PackedString -> [PackedString]
334 unwordsPS :: [PackedString] -> PackedString
335 splitPS :: Char -> PackedString -> [PackedString]
336 splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString]
337 joinPS :: PackedString -> [PackedString] -> PackedString
339 nilPS = NHC.PackedString.nil
340 consPS = NHC.PackedString.cons
341 headPS = NHC.PackedString.head
342 tailPS = NHC.PackedString.tail
343 nullPS = NHC.PackedString.null
344 appendPS = NHC.PackedString.append
345 lengthPS = NHC.PackedString.length
346 indexPS p i = (unpackPS p) !! i
347 mapPS = NHC.PackedString.map
348 filterPS = NHC.PackedString.filter
349 reversePS = NHC.PackedString.reverse
350 concatPS = NHC.PackedString.concat
351 elemPS c p = c `elem` unpackPS p
352 substrPS = NHC.PackedString.substr
353 takePS = NHC.PackedString.take
354 dropPS = NHC.PackedString.drop
355 splitAtPS = NHC.PackedString.splitAt
357 foldlPS = NHC.PackedString.foldl
358 foldrPS = NHC.PackedString.foldr
359 takeWhilePS = NHC.PackedString.takeWhile
360 dropWhilePS = NHC.PackedString.dropWhile
361 spanPS = NHC.PackedString.span
362 breakPS = NHC.PackedString.break
363 linesPS = NHC.PackedString.lines
364 unlinesPS = NHC.PackedString.unlines
366 wordsPS = NHC.PackedString.words
367 unwordsPS = NHC.PackedString.unwords
368 splitPS c = splitWithPS (==c)
370 map packString . split' p [] . unpackPS
372 split' :: (Char->Bool) -> String -> String -> [String]
373 split' pred [] [] = []
374 split' pred acc [] = [reverse acc]
375 split' pred acc (x:xs) | pred x = reverse acc: split' pred [] xs
376 | otherwise = split' pred (x:acc) xs
378 joinPS sep = concatPS . intersperse sep