1 -----------------------------------------------------------------------------
3 -- Module : Data.PackedString
4 -- Copyright : (c) The University of Glasgow 2001
5 -- License : BSD-style (see the file libraries/base/LICENSE)
7 -- Maintainer : libraries@haskell.org
8 -- Stability : experimental
9 -- Portability : portable
11 -- This API is deprecated. You might be able to use "Data.ByteString"
12 -- or "Data.ByteString.Char8", provided you don't need full Unicode support.
13 -- The long term aim is to provide a Unicode layer on "Data.ByteString",
14 -- and then to provide a replacement for this "Data.PackedString" API based on
17 -----------------------------------------------------------------------------
19 -- Original GHC implementation by Bryan O\'Sullivan,
20 -- rewritten to use UArray by Simon Marlow.
22 module Data.PackedString
23 {-# DEPRECATED "use Data.ByteString, Data.ByteString.Char8, or plain String." #-}
25 -- * The @PackedString@ type
26 PackedString, -- abstract, instances: Eq, Ord, Show, Typeable
28 -- * Converting to and from @PackedString@s
29 packString, -- :: String -> PackedString
30 unpackPS, -- :: PackedString -> String
33 -- * I\/O with @PackedString@s
34 hPutPS, -- :: Handle -> PackedString -> IO ()
35 hGetPS, -- :: Handle -> Int -> IO PackedString
38 -- * List-like manipulation functions
39 nilPS, -- :: PackedString
40 consPS, -- :: Char -> PackedString -> PackedString
41 headPS, -- :: PackedString -> Char
42 tailPS, -- :: PackedString -> PackedString
43 nullPS, -- :: PackedString -> Bool
44 appendPS, -- :: PackedString -> PackedString -> PackedString
45 lengthPS, -- :: PackedString -> Int
46 indexPS, -- :: PackedString -> Int -> Char
47 mapPS, -- :: (Char -> Char) -> PackedString -> PackedString
48 filterPS, -- :: (Char -> Bool) -> PackedString -> PackedString
49 reversePS, -- :: PackedString -> PackedString
50 concatPS, -- :: [PackedString] -> PackedString
51 elemPS, -- :: Char -> PackedString -> Bool
52 substrPS, -- :: PackedString -> Int -> Int -> PackedString
53 takePS, -- :: Int -> PackedString -> PackedString
54 dropPS, -- :: Int -> PackedString -> PackedString
55 splitAtPS, -- :: Int -> PackedString -> (PackedString, PackedString)
57 foldlPS, -- :: (a -> Char -> a) -> a -> PackedString -> a
58 foldrPS, -- :: (Char -> a -> a) -> a -> PackedString -> a
59 takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
60 dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
61 spanPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
62 breakPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
63 linesPS, -- :: PackedString -> [PackedString]
64 unlinesPS, -- :: [PackedString] -> PackedString
65 wordsPS, -- :: PackedString -> [PackedString]
66 unwordsPS, -- :: [PackedString] -> PackedString
67 splitPS, -- :: Char -> PackedString -> [PackedString]
68 splitWithPS, -- :: (Char -> Bool) -> PackedString -> [PackedString]
70 joinPS, -- :: PackedString -> [PackedString] -> PackedString
78 import Data.Array.Unboxed
86 -- -----------------------------------------------------------------------------
87 -- PackedString type declaration
89 -- | A space-efficient representation of a 'String', which supports various
90 -- efficient operations. A 'PackedString' contains full Unicode 'Char's.
91 newtype PackedString = PS (UArray Int Char)
93 -- ToDo: we could support "slices", i.e. include offset and length fields into
94 -- the string, so that operations like take/drop could be O(1). Perhaps making
95 -- a slice should be conditional on the ratio of the slice/string size to
96 -- limit memory leaks.
98 instance Eq PackedString where
99 (PS x) == (PS y) = x == y
101 instance Ord PackedString where
102 compare (PS x) (PS y) = compare x y
104 --instance Read PackedString: ToDo
106 instance Show PackedString where
107 showsPrec p ps r = showsPrec p (unpackPS ps) r
109 #include "Typeable.h"
110 INSTANCE_TYPEABLE0(PackedString,packedStringTc,"PackedString")
112 -- -----------------------------------------------------------------------------
113 -- Constructor functions
115 -- | The 'nilPS' value is the empty string.
116 nilPS :: PackedString
117 nilPS = PS (array (0,-1) [])
119 -- | The 'consPS' function prepends the given character to the
121 consPS :: Char -> PackedString -> PackedString
122 consPS c cs = packString (c : (unpackPS cs)) -- ToDo:better
124 -- | Convert a 'String' into a 'PackedString'
125 packString :: String -> PackedString
126 packString str = packNChars (length str) str
128 -- | The 'packNChars' function creates a 'PackedString' out of the
129 -- first @len@ elements of the given 'String'.
130 packNChars :: Int -> [Char] -> PackedString
131 packNChars len str = PS (listArray (0,len-1) str)
133 -- -----------------------------------------------------------------------------
134 -- Destructor functions (taking PackedStrings apart)
136 -- | Convert a 'PackedString' into a 'String'
137 unpackPS :: PackedString -> String
138 unpackPS (PS ps) = elems ps
140 -- -----------------------------------------------------------------------------
141 -- List-mimicking functions for PackedStrings
143 -- | The 'lengthPS' function returns the length of the input list. Analogous to 'length'.
144 lengthPS :: PackedString -> Int
145 lengthPS (PS ps) = rangeSize (bounds ps)
147 -- | The 'indexPS' function returns the character in the string at the given position.
148 indexPS :: PackedString -> Int -> Char
149 indexPS (PS ps) i = ps ! i
151 -- | The 'headPS' function returns the first element of a 'PackedString' or throws an
152 -- error if the string is empty.
153 headPS :: PackedString -> Char
155 | nullPS ps = error "Data.PackedString.headPS: head []"
156 | otherwise = indexPS ps 0
158 -- | The 'tailPS' function returns the tail of a 'PackedString' or throws an error
159 -- if the string is empty.
160 tailPS :: PackedString -> PackedString
162 | len <= 0 = error "Data.PackedString.tailPS: tail []"
164 | otherwise = substrPS ps 1 (len - 1)
168 -- | The 'nullPS' function returns True iff the argument is null.
169 nullPS :: PackedString -> Bool
170 nullPS (PS ps) = rangeSize (bounds ps) == 0
172 -- | The 'appendPS' function appends the second string onto the first.
173 appendPS :: PackedString -> PackedString -> PackedString
177 | otherwise = concatPS [xs,ys]
179 -- | The 'mapPS' function applies a function to each character in the string.
180 mapPS :: (Char -> Char) -> PackedString -> PackedString
181 mapPS f (PS ps) = PS (amap f ps)
183 -- | The 'filterPS' function filters out the appropriate substring.
184 filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-}
185 filterPS pred ps = packString (filter pred (unpackPS ps))
187 -- | The 'foldlPS' function behaves like 'foldl' on 'PackedString's.
188 foldlPS :: (a -> Char -> a) -> a -> PackedString -> a
189 foldlPS f b ps = foldl f b (unpackPS ps)
191 -- | The 'foldrPS' function behaves like 'foldr' on 'PackedString's.
192 foldrPS :: (Char -> a -> a) -> a -> PackedString -> a
193 foldrPS f v ps = foldr f v (unpackPS ps)
195 -- | The 'takePS' function takes the first @n@ characters of a 'PackedString'.
196 takePS :: Int -> PackedString -> PackedString
197 takePS n ps = substrPS ps 0 (n-1)
199 -- | The 'dropPS' function drops the first @n@ characters of a 'PackedString'.
200 dropPS :: Int -> PackedString -> PackedString
201 dropPS n ps = substrPS ps n (lengthPS ps - 1)
203 -- | The 'splitWithPS' function splits a 'PackedString' at a given index.
204 splitAtPS :: Int -> PackedString -> (PackedString, PackedString)
205 splitAtPS n ps = (takePS n ps, dropPS n ps)
207 -- | The 'takeWhilePS' function is analogous to the 'takeWhile' function.
208 takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString
209 takeWhilePS pred ps = packString (takeWhile pred (unpackPS ps))
211 -- | The 'dropWhilePS' function is analogous to the 'dropWhile' function.
212 dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString
213 dropWhilePS pred ps = packString (dropWhile pred (unpackPS ps))
215 -- | The 'elemPS' function returns True iff the given element is in the string.
216 elemPS :: Char -> PackedString -> Bool
217 elemPS c ps = c `elem` unpackPS ps
219 -- | The 'spanPS' function returns a pair containing the result of
220 -- running both 'takeWhilePS' and 'dropWhilePS'.
221 spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
222 spanPS p ps = (takeWhilePS p ps, dropWhilePS p ps)
224 -- | The 'breakPS' function breaks a string at the first position which
225 -- satisfies the predicate.
226 breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
227 breakPS p ps = spanPS (not . p) ps
229 -- | The 'linesPS' function splits the input on line-breaks.
230 linesPS :: PackedString -> [PackedString]
231 linesPS ps = splitPS '\n' ps
233 -- | The 'unlinesPS' function concatenates the input list after
234 -- interspersing newlines.
235 unlinesPS :: [PackedString] -> PackedString
236 unlinesPS = joinPS (packString "\n")
238 -- | The 'wordsPS' function is analogous to the 'words' function.
239 wordsPS :: PackedString -> [PackedString]
240 wordsPS ps = filter (not.nullPS) (splitWithPS isSpace ps)
242 -- | The 'unwordsPS' function is analogous to the 'unwords' function.
243 unwordsPS :: [PackedString] -> PackedString
244 unwordsPS = joinPS (packString " ")
246 -- | The 'reversePS' function reverses the string.
247 reversePS :: PackedString -> PackedString
248 reversePS ps = packString (reverse (unpackPS ps))
250 -- | The 'concatPS' function concatenates a list of 'PackedString's.
251 concatPS :: [PackedString] -> PackedString
252 concatPS pss = packString (concat (map unpackPS pss))
254 ------------------------------------------------------------
256 -- | The 'joinPS' function takes a 'PackedString' and a list of 'PackedString's
257 -- and concatenates the list after interspersing the first argument between
258 -- each element of the list.
259 joinPS :: PackedString -> [PackedString] -> PackedString
260 joinPS filler pss = concatPS (splice pss)
264 splice (x:y:xs) = x:filler:splice (y:xs)
266 -- ToDo: the obvious generalisation
268 Some properties that hold:
271 where False = any (map (x `elemPS`) ls')
273 * joinPS (packString [x]) (splitPS x ls) = ls
276 -- | The 'splitPS' function splits the input string on each occurrence of the given 'Char'.
277 splitPS :: Char -> PackedString -> [PackedString]
278 splitPS c = splitWithPS (== c)
280 -- | The 'splitWithPS' function takes a character predicate and splits the input string
281 -- at each character which satisfies the predicate.
282 splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString]
283 splitWithPS pred (PS ps) =
286 len = lengthPS (PS ps)
292 break_pt = first_pos_that_satisfies pred ps len n
294 if break_pt == n then -- immediate match, empty substring
296 : splitify (break_pt + 1)
298 substrPS (PS ps) n (break_pt - 1) -- leave out the matching character
299 : splitify (break_pt + 1)
301 first_pos_that_satisfies pred ps len n =
302 case [ m | m <- [n..len-1], pred (ps ! m) ] of
306 -- -----------------------------------------------------------------------------
307 -- Local utility functions
309 -- The definition of @_substrPS@ is essentially:
310 -- @take (end - begin + 1) (drop begin str)@.
312 -- | The 'substrPS' function takes a 'PackedString' and two indices
313 -- and returns the substring of the input string between (and including)
315 substrPS :: PackedString -> Int -> Int -> PackedString
316 substrPS (PS ps) begin end = packString [ ps ! i | i <- [begin..end] ]
318 -- -----------------------------------------------------------------------------
321 -- | Outputs a 'PackedString' to the specified 'Handle'.
323 -- NOTE: the representation of the 'PackedString' in the file is assumed to
324 -- be in the ISO-8859-1 encoding. In other words, only the least significant
325 -- byte is taken from each character in the 'PackedString'.
326 hPutPS :: Handle -> PackedString -> IO ()
327 hPutPS h (PS ps) = do
328 let l = lengthPS (PS ps)
329 arr <- newArray_ (0, l-1)
330 sequence_ [ writeArray arr i (fromIntegral (ord (ps ! i))) | i <- [0..l-1] ]
333 -- -----------------------------------------------------------------------------
336 -- | Read a 'PackedString' directly from the specified 'Handle'.
337 -- This is far more efficient than reading the characters into a 'String'
338 -- and then using 'packString'.
340 -- NOTE: as with 'hPutPS', the string representation in the file is
341 -- assumed to be ISO-8859-1.
342 hGetPS :: Handle -> Int -> IO PackedString
344 arr <- newArray_ (0, i-1)
345 l <- hGetArray h arr i
346 chars <- mapM (\i -> readArray arr i >>= return.chr.fromIntegral) [0..l-1]
347 return (packNChars l chars)
351 --import Prelude hiding (append, break, concat, cons, drop, dropWhile,
352 -- filter, foldl, foldr, head, length, lines, map,
353 -- nil, null, reverse, span, splitAt, subst, tail,
354 -- take, takeWhile, unlines, unwords, words)
355 -- also hiding: Ix(..), Functor(..)
356 import qualified NHC.PackedString
357 import NHC.PackedString (PackedString,packString,unpackPS)
358 import List (intersperse)
361 nilPS :: PackedString
362 consPS :: Char -> PackedString -> PackedString
363 headPS :: PackedString -> Char
364 tailPS :: PackedString -> PackedString
365 nullPS :: PackedString -> Bool
366 appendPS :: PackedString -> PackedString -> PackedString
367 lengthPS :: PackedString -> Int
368 indexPS :: PackedString -> Int -> Char
369 mapPS :: (Char -> Char) -> PackedString -> PackedString
370 filterPS :: (Char -> Bool) -> PackedString -> PackedString
371 reversePS :: PackedString -> PackedString
372 concatPS :: [PackedString] -> PackedString
373 elemPS :: Char -> PackedString -> Bool
374 substrPS :: PackedString -> Int -> Int -> PackedString
375 takePS :: Int -> PackedString -> PackedString
376 dropPS :: Int -> PackedString -> PackedString
377 splitAtPS :: Int -> PackedString -> (PackedString, PackedString)
379 foldlPS :: (a -> Char -> a) -> a -> PackedString -> a
380 foldrPS :: (Char -> a -> a) -> a -> PackedString -> a
381 takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString
382 dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString
383 spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
384 breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
385 linesPS :: PackedString -> [PackedString]
386 unlinesPS :: [PackedString] -> PackedString
388 wordsPS :: PackedString -> [PackedString]
389 unwordsPS :: [PackedString] -> PackedString
390 splitPS :: Char -> PackedString -> [PackedString]
391 splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString]
392 joinPS :: PackedString -> [PackedString] -> PackedString
394 nilPS = NHC.PackedString.nil
395 consPS = NHC.PackedString.cons
396 headPS = NHC.PackedString.head
397 tailPS = NHC.PackedString.tail
398 nullPS = NHC.PackedString.null
399 appendPS = NHC.PackedString.append
400 lengthPS = NHC.PackedString.length
401 indexPS p i = (unpackPS p) !! i
402 mapPS = NHC.PackedString.map
403 filterPS = NHC.PackedString.filter
404 reversePS = NHC.PackedString.reverse
405 concatPS = NHC.PackedString.concat
406 elemPS c p = c `elem` unpackPS p
407 substrPS = NHC.PackedString.substr
408 takePS = NHC.PackedString.take
409 dropPS = NHC.PackedString.drop
410 splitAtPS = NHC.PackedString.splitAt
412 foldlPS = NHC.PackedString.foldl
413 foldrPS = NHC.PackedString.foldr
414 takeWhilePS = NHC.PackedString.takeWhile
415 dropWhilePS = NHC.PackedString.dropWhile
416 spanPS = NHC.PackedString.span
417 breakPS = NHC.PackedString.break
418 linesPS = NHC.PackedString.lines
419 unlinesPS = NHC.PackedString.unlines
421 wordsPS = NHC.PackedString.words
422 unwordsPS = NHC.PackedString.unwords
423 splitPS c = splitWithPS (==c)
425 map packString . split' p [] . unpackPS
427 split' :: (Char->Bool) -> String -> String -> [String]
428 split' pred [] [] = []
429 split' pred acc [] = [reverse acc]
430 split' pred acc (x:xs) | pred x = reverse acc: split' pred [] xs
431 | otherwise = split' pred (x:acc) xs
433 joinPS sep = concatPS . intersperse sep
437 instance Data PackedString where
438 gunfold k z c = error "gunfold"
439 toConstr (PS _) = con_PS
440 dataTypeOf _ = ty_PackedString
442 con_PS = mkConstr ty_PackedString "PS" [] Prefix
443 ty_PackedString = mkDataType "Data.PackedString.PackedString" [con_PS]