compiler/utils/Util.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The University of Glasgow 1992-2002
   4 %
   5
   6 \begin{code}
   7 -- | Highly random utility functions
   8 module Util (
   9         -- * Flags dependent on the compiler build
  10         ghciSupported, debugIsOn, ghciTablesNextToCode, isDynamicGhcLib,
  11         isWindowsHost, isWindowsTarget, isDarwinTarget,
  12
  13         -- * General list processing
  14         zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal,
  15         zipLazy, stretchZipWith,
  16
  17         unzipWith,
  18
  19         mapFst, mapSnd,
  20         mapAndUnzip, mapAndUnzip3,
  21         nOfThem, filterOut, partitionWith, splitEithers,
  22
  23         foldl1', foldl2, count, all2,
  24
  25         lengthExceeds, lengthIs, lengthAtLeast,
  26         listLengthCmp, atLength, equalLength, compareLength,
  27
  28         isSingleton, only, singleton,
  29         notNull, snocView,
  30
  31         isIn, isn'tIn,
  32
  33         -- * Tuples
  34         fstOf3, sndOf3, thirdOf3,
  35
  36         -- * List operations controlled by another list
  37         takeList, dropList, splitAtList, split,
  38         dropTail,
  39
  40         -- * For loop
  41         nTimes,
  42
  43         -- * Sorting
  44         sortLe, sortWith, on,
  45
  46         -- * Comparisons
  47         isEqual, eqListBy,
  48         thenCmp, cmpList,
  49         removeSpaces,
  50
  51         -- * Edit distance
  52         fuzzyMatch, fuzzyLookup,
  53
  54         -- * Transitive closures
  55         transitiveClosure,
  56
  57         -- * Strictness
  58         seqList,
  59
  60         -- * Module names
  61         looksLikeModuleName,
  62
  63         -- * Argument processing
  64         getCmd, toCmdArgs, toArgs,
  65
  66         -- * Floating point
  67         readRational,
  68
  69         -- * IO-ish utilities
  70         createDirectoryHierarchy,
  71         doesDirNameExist,
  72         modificationTimeIfExists,
  73
  74         global, consIORef, globalMVar, globalEmptyMVar,
  75
  76         -- * Filenames and paths
  77         Suffix,
  78         splitLongestPrefix,
  79         escapeSpaces,
  80         parseSearchPath,
  81         Direction(..), reslash,
  82
  83         -- * Utils for defining Data instances
  84         abstractConstr, abstractDataType, mkNoRepType,
  85
  86         -- * Utils for printing C code
  87         charToC
  88     ) where
  89
  90 #include "HsVersions.h"
  91
  92 import Exception
  93 import Panic
  94
  95 import Data.Data
  96 import Data.IORef       ( IORef, newIORef, atomicModifyIORef )
  97 import System.IO.Unsafe ( unsafePerformIO )
  98 import Data.List        hiding (group)
  99 import Control.Concurrent.MVar ( MVar, newMVar, newEmptyMVar )
 100
 101 #ifdef DEBUG
 102 import FastTypes
 103 #endif
 104
 105 import Control.Monad    ( unless )
 106 import System.IO.Error as IO ( isDoesNotExistError )
 107 import System.Directory ( doesDirectoryExist, createDirectory,
 108                           getModificationTime )
 109 import System.FilePath
 110 import System.Time      ( ClockTime )
 111
 112 import Data.Char        ( isUpper, isAlphaNum, isSpace, chr, ord, isDigit )
 113 import Data.Ratio       ( (%) )
 114 import Data.Ord         ( comparing )
 115 import Data.Bits
 116 import Data.Word
 117 import qualified Data.IntMap as IM
 118
 119 infixr 9 `thenCmp`
 120 \end{code}
 121
 122 %************************************************************************
 123 %*                                                                      *
 124 \subsection{Is DEBUG on, are we on Windows, etc?}
 125 %*                                                                      *
 126 %************************************************************************
 127
 128 These booleans are global constants, set by CPP flags.  They allow us to
 129 recompile a single module (this one) to change whether or not debug output
 130 appears. They sometimes let us avoid even running CPP elsewhere.
 131
 132 It's important that the flags are literal constants (True/False). Then,
 133 with -0, tests of the flags in other modules will simplify to the correct
 134 branch of the conditional, thereby dropping debug code altogether when
 135 the flags are off.
 136
 137 \begin{code}
 138 ghciSupported :: Bool
 139 #ifdef GHCI
 140 ghciSupported = True
 141 #else
 142 ghciSupported = False
 143 #endif
 144
 145 debugIsOn :: Bool
 146 #ifdef DEBUG
 147 debugIsOn = True
 148 #else
 149 debugIsOn = False
 150 #endif
 151
 152 ghciTablesNextToCode :: Bool
 153 #ifdef GHCI_TABLES_NEXT_TO_CODE
 154 ghciTablesNextToCode = True
 155 #else
 156 ghciTablesNextToCode = False
 157 #endif
 158
 159 isDynamicGhcLib :: Bool
 160 #ifdef DYNAMIC
 161 isDynamicGhcLib = True
 162 #else
 163 isDynamicGhcLib = False
 164 #endif
 165
 166 isWindowsHost :: Bool
 167 #ifdef mingw32_HOST_OS
 168 isWindowsHost = True
 169 #else
 170 isWindowsHost = False
 171 #endif
 172
 173 isWindowsTarget :: Bool
 174 #ifdef mingw32_TARGET_OS
 175 isWindowsTarget = True
 176 #else
 177 isWindowsTarget = False
 178 #endif
 179
 180 isDarwinTarget :: Bool
 181 #ifdef darwin_TARGET_OS
 182 isDarwinTarget = True
 183 #else
 184 isDarwinTarget = False
 185 #endif
 186 \end{code}
 187
 188 %************************************************************************
 189 %*                                                                      *
 190 \subsection{A for loop}
 191 %*                                                                      *
 192 %************************************************************************
 193
 194 \begin{code}
 195 -- | Compose a function with itself n times.  (nth rather than twice)
 196 nTimes :: Int -> (a -> a) -> (a -> a)
 197 nTimes 0 _ = id
 198 nTimes 1 f = f
 199 nTimes n f = f . nTimes (n-1) f
 200 \end{code}
 201
 202 \begin{code}
 203 fstOf3   :: (a,b,c) -> a
 204 sndOf3   :: (a,b,c) -> b
 205 thirdOf3 :: (a,b,c) -> c
 206 fstOf3      (a,_,_) =  a
 207 sndOf3      (_,b,_) =  b
 208 thirdOf3    (_,_,c) =  c
 209 \end{code}
 210
 211 %************************************************************************
 212 %*                                                                      *
 213 \subsection[Utils-lists]{General list processing}
 214 %*                                                                      *
 215 %************************************************************************
 216
 217 \begin{code}
 218 filterOut :: (a->Bool) -> [a] -> [a]
 219 -- ^ Like filter, only it reverses the sense of the test
 220 filterOut _ [] = []
 221 filterOut p (x:xs) | p x       = filterOut p xs
 222                    | otherwise = x : filterOut p xs
 223
 224 partitionWith :: (a -> Either b c) -> [a] -> ([b], [c])
 225 -- ^ Uses a function to determine which of two output lists an input element should join
 226 partitionWith _ [] = ([],[])
 227 partitionWith f (x:xs) = case f x of
 228                          Left  b -> (b:bs, cs)
 229                          Right c -> (bs, c:cs)
 230     where (bs,cs) = partitionWith f xs
 231
 232 splitEithers :: [Either a b] -> ([a], [b])
 233 -- ^ Teases a list of 'Either's apart into two lists
 234 splitEithers [] = ([],[])
 235 splitEithers (e : es) = case e of
 236                         Left x -> (x:xs, ys)
 237                         Right y -> (xs, y:ys)
 238     where (xs,ys) = splitEithers es
 239 \end{code}
 240
 241 A paranoid @zip@ (and some @zipWith@ friends) that checks the lists
 242 are of equal length.  Alastair Reid thinks this should only happen if
 243 DEBUGging on; hey, why not?
 244
 245 \begin{code}
 246 zipEqual        :: String -> [a] -> [b] -> [(a,b)]
 247 zipWithEqual    :: String -> (a->b->c) -> [a]->[b]->[c]
 248 zipWith3Equal   :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d]
 249 zipWith4Equal   :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e]
 250
 251 #ifndef DEBUG
 252 zipEqual      _ = zip
 253 zipWithEqual  _ = zipWith
 254 zipWith3Equal _ = zipWith3
 255 zipWith4Equal _ = zipWith4
 256 #else
 257 zipEqual _   []     []     = []
 258 zipEqual msg (a:as) (b:bs) = (a,b) : zipEqual msg as bs
 259 zipEqual msg _      _      = panic ("zipEqual: unequal lists:"++msg)
 260
 261 zipWithEqual msg z (a:as) (b:bs)=  z a b : zipWithEqual msg z as bs
 262 zipWithEqual _   _ [] []        =  []
 263 zipWithEqual msg _ _ _          =  panic ("zipWithEqual: unequal lists:"++msg)
 264
 265 zipWith3Equal msg z (a:as) (b:bs) (c:cs)
 266                                 =  z a b c : zipWith3Equal msg z as bs cs
 267 zipWith3Equal _   _ [] []  []   =  []
 268 zipWith3Equal msg _ _  _   _    =  panic ("zipWith3Equal: unequal lists:"++msg)
 269
 270 zipWith4Equal msg z (a:as) (b:bs) (c:cs) (d:ds)
 271                                 =  z a b c d : zipWith4Equal msg z as bs cs ds
 272 zipWith4Equal _   _ [] [] [] [] =  []
 273 zipWith4Equal msg _ _  _  _  _  =  panic ("zipWith4Equal: unequal lists:"++msg)
 274 #endif
 275 \end{code}
 276
 277 \begin{code}
 278 -- | 'zipLazy' is a kind of 'zip' that is lazy in the second list (observe the ~)
 279 zipLazy :: [a] -> [b] -> [(a,b)]
 280 zipLazy []     _       = []
 281 -- We want to write this, but with GHC 6.4 we get a warning, so it
 282 -- doesn't validate:
 283 -- zipLazy (x:xs) ~(y:ys) = (x,y) : zipLazy xs ys
 284 -- so we write this instead:
 285 zipLazy (x:xs) zs = let y : ys = zs
 286                     in (x,y) : zipLazy xs ys
 287 \end{code}
 288
 289
 290 \begin{code}
 291 stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c]
 292 -- ^ @stretchZipWith p z f xs ys@ stretches @ys@ by inserting @z@ in
 293 -- the places where @p@ returns @True@
 294
 295 stretchZipWith _ _ _ []     _ = []
 296 stretchZipWith p z f (x:xs) ys
 297   | p x       = f x z : stretchZipWith p z f xs ys
 298   | otherwise = case ys of
 299                 []     -> []
 300                 (y:ys) -> f x y : stretchZipWith p z f xs ys
 301 \end{code}
 302
 303
 304 \begin{code}
 305 mapFst :: (a->c) -> [(a,b)] -> [(c,b)]
 306 mapSnd :: (b->c) -> [(a,b)] -> [(a,c)]
 307
 308 mapFst f xys = [(f x, y) | (x,y) <- xys]
 309 mapSnd f xys = [(x, f y) | (x,y) <- xys]
 310
 311 mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c])
 312
 313 mapAndUnzip _ [] = ([], [])
 314 mapAndUnzip f (x:xs)
 315   = let (r1,  r2)  = f x
 316         (rs1, rs2) = mapAndUnzip f xs
 317     in
 318     (r1:rs1, r2:rs2)
 319
 320 mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d])
 321
 322 mapAndUnzip3 _ [] = ([], [], [])
 323 mapAndUnzip3 f (x:xs)
 324   = let (r1,  r2,  r3)  = f x
 325         (rs1, rs2, rs3) = mapAndUnzip3 f xs
 326     in
 327     (r1:rs1, r2:rs2, r3:rs3)
 328 \end{code}
 329
 330 \begin{code}
 331 nOfThem :: Int -> a -> [a]
 332 nOfThem n thing = replicate n thing
 333
 334 -- | @atLength atLen atEnd ls n@ unravels list @ls@ to position @n@. Precisely:
 335 --
 336 -- @
 337 --  atLength atLenPred atEndPred ls n
 338 --   | n < 0         = atLenPred n
 339 --   | length ls < n = atEndPred (n - length ls)
 340 --   | otherwise     = atLenPred (drop n ls)
 341 -- @
 342 atLength :: ([a] -> b)
 343          -> (Int -> b)
 344          -> [a]
 345          -> Int
 346          -> b
 347 atLength atLenPred atEndPred ls n
 348   | n < 0     = atEndPred n
 349   | otherwise = go n ls
 350   where
 351     go n [] = atEndPred n
 352     go 0 ls = atLenPred ls
 353     go n (_:xs) = go (n-1) xs
 354
 355 -- Some special cases of atLength:
 356
 357 lengthExceeds :: [a] -> Int -> Bool
 358 -- ^ > (lengthExceeds xs n) = (length xs > n)
 359 lengthExceeds = atLength notNull (const False)
 360
 361 lengthAtLeast :: [a] -> Int -> Bool
 362 lengthAtLeast = atLength notNull (== 0)
 363
 364 lengthIs :: [a] -> Int -> Bool
 365 lengthIs = atLength null (==0)
 366
 367 listLengthCmp :: [a] -> Int -> Ordering
 368 listLengthCmp = atLength atLen atEnd
 369  where
 370   atEnd 0      = EQ
 371   atEnd x
 372    | x > 0     = LT -- not yet seen 'n' elts, so list length is < n.
 373    | otherwise = GT
 374
 375   atLen []     = EQ
 376   atLen _      = GT
 377
 378 equalLength :: [a] -> [b] -> Bool
 379 equalLength []     []     = True
 380 equalLength (_:xs) (_:ys) = equalLength xs ys
 381 equalLength _      _      = False
 382
 383 compareLength :: [a] -> [b] -> Ordering
 384 compareLength []     []     = EQ
 385 compareLength (_:xs) (_:ys) = compareLength xs ys
 386 compareLength []     _      = LT
 387 compareLength _      []     = GT
 388
 389 ----------------------------
 390 singleton :: a -> [a]
 391 singleton x = [x]
 392
 393 isSingleton :: [a] -> Bool
 394 isSingleton [_] = True
 395 isSingleton _   = False
 396
 397 notNull :: [a] -> Bool
 398 notNull [] = False
 399 notNull _  = True
 400
 401 only :: [a] -> a
 402 #ifdef DEBUG
 403 only [a] = a
 404 #else
 405 only (a:_) = a
 406 #endif
 407 only _ = panic "Util: only"
 408 \end{code}
 409
 410 Debugging/specialising versions of \tr{elem} and \tr{notElem}
 411
 412 \begin{code}
 413 isIn, isn'tIn :: Eq a => String -> a -> [a] -> Bool
 414
 415 # ifndef DEBUG
 416 isIn    _msg x ys = x `elem` ys
 417 isn'tIn _msg x ys = x `notElem` ys
 418
 419 # else /* DEBUG */
 420 isIn msg x ys
 421   = elem100 (_ILIT(0)) x ys
 422   where
 423     elem100 _ _ []        = False
 424     elem100 i x (y:ys)
 425       | i ># _ILIT(100) = trace ("Over-long elem in " ++ msg)
 426                                 (x `elem` (y:ys))
 427       | otherwise       = x == y || elem100 (i +# _ILIT(1)) x ys
 428
 429 isn'tIn msg x ys
 430   = notElem100 (_ILIT(0)) x ys
 431   where
 432     notElem100 _ _ [] =  True
 433     notElem100 i x (y:ys)
 434       | i ># _ILIT(100) = trace ("Over-long notElem in " ++ msg)
 435                                 (x `notElem` (y:ys))
 436       | otherwise      =  x /= y && notElem100 (i +# _ILIT(1)) x ys
 437 # endif /* DEBUG */
 438 \end{code}
 439
 440 %************************************************************************
 441 %*                                                                      *
 442 \subsubsection[Utils-Carsten-mergesort]{A mergesort from Carsten}
 443 %*                                                                      *
 444 %************************************************************************
 445
 446 \begin{display}
 447 Date: Mon, 3 May 93 20:45:23 +0200
 448 From: Carsten Kehler Holst <kehler@cs.chalmers.se>
 449 To: partain@dcs.gla.ac.uk
 450 Subject: natural merge sort beats quick sort [ and it is prettier ]
 451
 452 Here is a piece of Haskell code that I'm rather fond of. See it as an
 453 attempt to get rid of the ridiculous quick-sort routine. group is
 454 quite useful by itself I think it was John's idea originally though I
 455 believe the lazy version is due to me [surprisingly complicated].
 456 gamma [used to be called] is called gamma because I got inspired by
 457 the Gamma calculus. It is not very close to the calculus but does
 458 behave less sequentially than both foldr and foldl. One could imagine
 459 a version of gamma that took a unit element as well thereby avoiding
 460 the problem with empty lists.
 461
 462 I've tried this code against
 463
 464    1) insertion sort - as provided by haskell
 465    2) the normal implementation of quick sort
 466    3) a deforested version of quick sort due to Jan Sparud
 467    4) a super-optimized-quick-sort of Lennart's
 468
 469 If the list is partially sorted both merge sort and in particular
 470 natural merge sort wins. If the list is random [ average length of
 471 rising subsequences = approx 2 ] mergesort still wins and natural
 472 merge sort is marginally beaten by Lennart's soqs. The space
 473 consumption of merge sort is a bit worse than Lennart's quick sort
 474 approx a factor of 2. And a lot worse if Sparud's bug-fix [see his
 475 fpca article ] isn't used because of group.
 476
 477 have fun
 478 Carsten
 479 \end{display}
 480
 481 \begin{code}
 482 group :: (a -> a -> Bool) -> [a] -> [[a]]
 483 -- Given a <= function, group finds maximal contiguous up-runs
 484 -- or down-runs in the input list.
 485 -- It's stable, in the sense that it never re-orders equal elements
 486 --
 487 -- Date: Mon, 12 Feb 1996 15:09:41 +0000
 488 -- From: Andy Gill <andy@dcs.gla.ac.uk>
 489 -- Here is a `better' definition of group.
 490
 491 group _ []     = []
 492 group p (x:xs) = group' xs x x (x :)
 493   where
 494     group' []     _     _     s  = [s []]
 495     group' (x:xs) x_min x_max s
 496         |      x_max `p` x  = group' xs x_min x     (s . (x :))
 497         | not (x_min `p` x) = group' xs x     x_max ((x :) . s)
 498         | otherwise         = s [] : group' xs x x (x :)
 499         -- NB: the 'not' is essential for stablity
 500         --     x `p` x_min would reverse equal elements
 501
 502 generalMerge :: (a -> a -> Bool) -> [a] -> [a] -> [a]
 503 generalMerge _ xs [] = xs
 504 generalMerge _ [] ys = ys
 505 generalMerge p (x:xs) (y:ys) | x `p` y   = x : generalMerge p xs     (y:ys)
 506                              | otherwise = y : generalMerge p (x:xs) ys
 507
 508 -- gamma is now called balancedFold
 509
 510 balancedFold :: (a -> a -> a) -> [a] -> a
 511 balancedFold _ [] = error "can't reduce an empty list using balancedFold"
 512 balancedFold _ [x] = x
 513 balancedFold f l  = balancedFold f (balancedFold' f l)
 514
 515 balancedFold' :: (a -> a -> a) -> [a] -> [a]
 516 balancedFold' f (x:y:xs) = f x y : balancedFold' f xs
 517 balancedFold' _ xs = xs
 518
 519 generalNaturalMergeSort :: (a -> a -> Bool) -> [a] -> [a]
 520 generalNaturalMergeSort _ [] = []
 521 generalNaturalMergeSort p xs = (balancedFold (generalMerge p) . group p) xs
 522
 523 #if NOT_USED
 524 generalMergeSort p [] = []
 525 generalMergeSort p xs = (balancedFold (generalMerge p) . map (: [])) xs
 526
 527 mergeSort, naturalMergeSort :: Ord a => [a] -> [a]
 528
 529 mergeSort = generalMergeSort (<=)
 530 naturalMergeSort = generalNaturalMergeSort (<=)
 531
 532 mergeSortLe le = generalMergeSort le
 533 #endif
 534
 535 sortLe :: (a->a->Bool) -> [a] -> [a]
 536 sortLe le = generalNaturalMergeSort le
 537
 538 sortWith :: Ord b => (a->b) -> [a] -> [a]
 539 sortWith get_key xs = sortLe le xs
 540   where
 541     x `le` y = get_key x < get_key y
 542
 543 on :: (a -> a -> c) -> (b -> a) -> b -> b -> c
 544 on cmp sel = \x y -> sel x `cmp` sel y
 545
 546 \end{code}
 547
 548 %************************************************************************
 549 %*                                                                      *
 550 \subsection[Utils-transitive-closure]{Transitive closure}
 551 %*                                                                      *
 552 %************************************************************************
 553
 554 This algorithm for transitive closure is straightforward, albeit quadratic.
 555
 556 \begin{code}
 557 transitiveClosure :: (a -> [a])         -- Successor function
 558                   -> (a -> a -> Bool)   -- Equality predicate
 559                   -> [a]
 560                   -> [a]                -- The transitive closure
 561
 562 transitiveClosure succ eq xs
 563  = go [] xs
 564  where
 565    go done []                      = done
 566    go done (x:xs) | x `is_in` done = go done xs
 567                   | otherwise      = go (x:done) (succ x ++ xs)
 568
 569    _ `is_in` []                 = False
 570    x `is_in` (y:ys) | eq x y    = True
 571                     | otherwise = x `is_in` ys
 572 \end{code}
 573
 574 %************************************************************************
 575 %*                                                                      *
 576 \subsection[Utils-accum]{Accumulating}
 577 %*                                                                      *
 578 %************************************************************************
 579
 580 A combination of foldl with zip.  It works with equal length lists.
 581
 582 \begin{code}
 583 foldl2 :: (acc -> a -> b -> acc) -> acc -> [a] -> [b] -> acc
 584 foldl2 _ z [] [] = z
 585 foldl2 k z (a:as) (b:bs) = foldl2 k (k z a b) as bs
 586 foldl2 _ _ _      _      = panic "Util: foldl2"
 587
 588 all2 :: (a -> b -> Bool) -> [a] -> [b] -> Bool
 589 -- True if the lists are the same length, and
 590 -- all corresponding elements satisfy the predicate
 591 all2 _ []     []     = True
 592 all2 p (x:xs) (y:ys) = p x y && all2 p xs ys
 593 all2 _ _      _      = False
 594 \end{code}
 595
 596 Count the number of times a predicate is true
 597
 598 \begin{code}
 599 count :: (a -> Bool) -> [a] -> Int
 600 count _ [] = 0
 601 count p (x:xs) | p x       = 1 + count p xs
 602                | otherwise = count p xs
 603 \end{code}
 604
 605 @splitAt@, @take@, and @drop@ but with length of another
 606 list giving the break-off point:
 607
 608 \begin{code}
 609 takeList :: [b] -> [a] -> [a]
 610 takeList [] _ = []
 611 takeList (_:xs) ls =
 612    case ls of
 613      [] -> []
 614      (y:ys) -> y : takeList xs ys
 615
 616 dropList :: [b] -> [a] -> [a]
 617 dropList [] xs    = xs
 618 dropList _  xs@[] = xs
 619 dropList (_:xs) (_:ys) = dropList xs ys
 620
 621
 622 splitAtList :: [b] -> [a] -> ([a], [a])
 623 splitAtList [] xs     = ([], xs)
 624 splitAtList _ xs@[]   = (xs, xs)
 625 splitAtList (_:xs) (y:ys) = (y:ys', ys'')
 626     where
 627       (ys', ys'') = splitAtList xs ys
 628
 629 -- drop from the end of a list
 630 dropTail :: Int -> [a] -> [a]
 631 dropTail n = reverse . drop n . reverse
 632
 633 snocView :: [a] -> Maybe ([a],a)
 634         -- Split off the last element
 635 snocView [] = Nothing
 636 snocView xs = go [] xs
 637             where
 638                 -- Invariant: second arg is non-empty
 639               go acc [x]    = Just (reverse acc, x)
 640               go acc (x:xs) = go (x:acc) xs
 641               go _   []     = panic "Util: snocView"
 642
 643 split :: Char -> String -> [String]
 644 split c s = case rest of
 645                 []     -> [chunk]
 646                 _:rest -> chunk : split c rest
 647   where (chunk, rest) = break (==c) s
 648 \end{code}
 649
 650
 651 %************************************************************************
 652 %*                                                                      *
 653 \subsection[Utils-comparison]{Comparisons}
 654 %*                                                                      *
 655 %************************************************************************
 656
 657 \begin{code}
 658 isEqual :: Ordering -> Bool
 659 -- Often used in (isEqual (a `compare` b))
 660 isEqual GT = False
 661 isEqual EQ = True
 662 isEqual LT = False
 663
 664 thenCmp :: Ordering -> Ordering -> Ordering
 665 {-# INLINE thenCmp #-}
 666 thenCmp EQ       ordering = ordering
 667 thenCmp ordering _        = ordering
 668
 669 eqListBy :: (a->a->Bool) -> [a] -> [a] -> Bool
 670 eqListBy _  []     []     = True
 671 eqListBy eq (x:xs) (y:ys) = eq x y && eqListBy eq xs ys
 672 eqListBy _  _      _      = False
 673
 674 cmpList :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering
 675     -- `cmpList' uses a user-specified comparer
 676
 677 cmpList _   []     [] = EQ
 678 cmpList _   []     _  = LT
 679 cmpList _   _      [] = GT
 680 cmpList cmp (a:as) (b:bs)
 681   = case cmp a b of { EQ -> cmpList cmp as bs; xxx -> xxx }
 682 \end{code}
 683
 684 \begin{code}
 685 removeSpaces :: String -> String
 686 removeSpaces = reverse . dropWhile isSpace . reverse . dropWhile isSpace
 687 \end{code}
 688
 689 %************************************************************************
 690 %*                                                                      *
 691 \subsection{Edit distance}
 692 %*                                                                      *
 693 %************************************************************************
 694
 695 \begin{code}
 696 -- | Find the "restricted" Damerau-Levenshtein edit distance between two strings.
 697 -- See: <http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance>.
 698 -- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing
 699 -- Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro).
 700 -- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and
 701 --     http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation
 702 restrictedDamerauLevenshteinDistance :: String -> String -> Int
 703 restrictedDamerauLevenshteinDistance str1 str2
 704   = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
 705   where
 706     m = length str1
 707     n = length str2
 708
 709 restrictedDamerauLevenshteinDistanceWithLengths
 710   :: Int -> Int -> String -> String -> Int
 711 restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
 712   | m <= n
 713   = if n <= 32 -- n must be larger so this check is sufficient
 714     then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2
 715     else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2
 716
 717   | otherwise
 718   = if m <= 32 -- m must be larger so this check is sufficient
 719     then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1
 720     else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1
 721
 722 restrictedDamerauLevenshteinDistance'
 723   :: (Bits bv) => bv -> Int -> Int -> String -> String -> Int
 724 restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2
 725   | [] <- str1 = n
 726   | otherwise  = extractAnswer $
 727                  foldl' (restrictedDamerauLevenshteinDistanceWorker
 728                              (matchVectors str1) top_bit_mask vector_mask)
 729                         (0, 0, m_ones, 0, m) str2
 730   where
 731     m_ones@vector_mask = (2 ^ m) - 1
 732     top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy
 733     extractAnswer (_, _, _, _, distance) = distance
 734
 735 restrictedDamerauLevenshteinDistanceWorker
 736       :: (Bits bv) => IM.IntMap bv -> bv -> bv
 737       -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int)
 738 restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask
 739                                            (pm, d0, vp, vn, distance) char2
 740   = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $
 741     seq pm' $ seq d0' $ seq vp' $ seq vn' $
 742     seq distance'' $ seq char2 $
 743     (pm', d0', vp', vn', distance'')
 744   where
 745     pm' = IM.findWithDefault 0 (ord char2) str1_mvs
 746
 747     d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm)
 748       .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn
 749           -- No need to mask the shiftL because of the restricted range of pm
 750
 751     hp' = vn .|. sizedComplement vector_mask (d0' .|. vp)
 752     hn' = d0' .&. vp
 753
 754     hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask
 755     hn'_shift = (hn' `shiftL` 1) .&. vector_mask
 756     vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift)
 757     vn' = d0' .&. hp'_shift
 758
 759     distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance
 760     distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance'
 761
 762 sizedComplement :: Bits bv => bv -> bv -> bv
 763 sizedComplement vector_mask vect = vector_mask `xor` vect
 764
 765 matchVectors :: Bits bv => String -> IM.IntMap bv
 766 matchVectors = snd . foldl' go (0 :: Int, IM.empty)
 767   where
 768     go (ix, im) char = let ix' = ix + 1
 769                            im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im
 770                        in seq ix' $ seq im' $ (ix', im')
 771
 772 #ifdef __GLASGOW_HASKELL__
 773 {-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
 774                       :: Word32 -> Int -> Int -> String -> String -> Int #-}
 775 {-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
 776                       :: Integer -> Int -> Int -> String -> String -> Int #-}
 777
 778 {-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
 779                :: IM.IntMap Word32 -> Word32 -> Word32
 780                -> (Word32, Word32, Word32, Word32, Int)
 781                -> Char -> (Word32, Word32, Word32, Word32, Int) #-}
 782 {-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
 783                :: IM.IntMap Integer -> Integer -> Integer
 784                -> (Integer, Integer, Integer, Integer, Int)
 785                -> Char -> (Integer, Integer, Integer, Integer, Int) #-}
 786
 787 {-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-}
 788 {-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-}
 789
 790 {-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-}
 791 {-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-}
 792 #endif
 793
 794 fuzzyMatch :: String -> [String] -> [String]
 795 fuzzyMatch key vals = fuzzyLookup key [(v,v) | v <- vals]
 796
 797 -- | Search for possible matches to the users input in the given list,
 798 -- returning a small number of ranked results
 799 fuzzyLookup :: String -> [(String,a)] -> [a]
 800 fuzzyLookup user_entered possibilites
 801   = map fst $ take mAX_RESULTS $ sortBy (comparing snd)
 802     [ (poss_val, distance) | (poss_str, poss_val) <- possibilites
 803                        , let distance = restrictedDamerauLevenshteinDistance
 804                                             poss_str user_entered
 805                        , distance <= fuzzy_threshold ]
 806   where
 807     -- Work out an approriate match threshold:
 808     -- We report a candidate if its edit distance is <= the threshold,
 809     -- The threshhold is set to about a quarter of the # of characters the user entered
 810     --   Length    Threshold
 811     --     1         0          -- Don't suggest *any* candidates
 812     --     2         1          -- for single-char identifiers
 813     --     3         1
 814     --     4         1
 815     --     5         1
 816     --     6         2
 817     --
 818     fuzzy_threshold = truncate $ fromIntegral (length user_entered + 2) / (4 :: Rational)
 819     mAX_RESULTS = 3
 820 \end{code}
 821
 822 %************************************************************************
 823 %*                                                                      *
 824 \subsection[Utils-pairs]{Pairs}
 825 %*                                                                      *
 826 %************************************************************************
 827
 828 \begin{code}
 829 unzipWith :: (a -> b -> c) -> [(a, b)] -> [c]
 830 unzipWith f pairs = map ( \ (a, b) -> f a b ) pairs
 831 \end{code}
 832
 833 \begin{code}
 834 seqList :: [a] -> b -> b
 835 seqList [] b = b
 836 seqList (x:xs) b = x `seq` seqList xs b
 837 \end{code}
 838
 839 Global variables:
 840
 841 \begin{code}
 842 global :: a -> IORef a
 843 global a = unsafePerformIO (newIORef a)
 844 \end{code}
 845
 846 \begin{code}
 847 consIORef :: IORef [a] -> a -> IO ()
 848 consIORef var x = do
 849   atomicModifyIORef var (\xs -> (x:xs,()))
 850 \end{code}
 851
 852 \begin{code}
 853 globalMVar :: a -> MVar a
 854 globalMVar a = unsafePerformIO (newMVar a)
 855
 856 globalEmptyMVar :: MVar a
 857 globalEmptyMVar = unsafePerformIO newEmptyMVar
 858 \end{code}
 859
 860 Module names:
 861
 862 \begin{code}
 863 looksLikeModuleName :: String -> Bool
 864 looksLikeModuleName [] = False
 865 looksLikeModuleName (c:cs) = isUpper c && go cs
 866   where go [] = True
 867         go ('.':cs) = looksLikeModuleName cs
 868         go (c:cs)   = (isAlphaNum c || c == '_' || c == '\'') && go cs
 869 \end{code}
 870
 871 Akin to @Prelude.words@, but acts like the Bourne shell, treating
 872 quoted strings as Haskell Strings, and also parses Haskell [String]
 873 syntax.
 874
 875 \begin{code}
 876 getCmd :: String -> Either String             -- Error
 877                            (String, String) -- (Cmd, Rest)
 878 getCmd s = case break isSpace $ dropWhile isSpace s of
 879            ([], _) -> Left ("Couldn't find command in " ++ show s)
 880            res -> Right res
 881
 882 toCmdArgs :: String -> Either String             -- Error
 883                               (String, [String]) -- (Cmd, Args)
 884 toCmdArgs s = case getCmd s of
 885               Left err -> Left err
 886               Right (cmd, s') -> case toArgs s' of
 887                                  Left err -> Left err
 888                                  Right args -> Right (cmd, args)
 889
 890 toArgs :: String -> Either String   -- Error
 891                            [String] -- Args
 892 toArgs str
 893     = case dropWhile isSpace str of
 894       s@('[':_) -> case reads s of
 895                    [(args, spaces)]
 896                     | all isSpace spaces ->
 897                        Right args
 898                    _ ->
 899                        Left ("Couldn't read " ++ show str ++ "as [String]")
 900       s -> toArgs' s
 901  where
 902   toArgs' s = case dropWhile isSpace s of
 903               [] -> Right []
 904               ('"' : _) -> case reads s of
 905                            [(arg, rest)]
 906                               -- rest must either be [] or start with a space
 907                             | all isSpace (take 1 rest) ->
 908                                case toArgs' rest of
 909                                Left err -> Left err
 910                                Right args -> Right (arg : args)
 911                            _ ->
 912                                Left ("Couldn't read " ++ show s ++ "as String")
 913               s' -> case break isSpace s' of
 914                     (arg, s'') -> case toArgs' s'' of
 915                                   Left err -> Left err
 916                                   Right args -> Right (arg : args)
 917 \end{code}
 918
 919 -- -----------------------------------------------------------------------------
 920 -- Floats
 921
 922 \begin{code}
 923 readRational__ :: ReadS Rational -- NB: doesn't handle leading "-"
 924 readRational__ r = do
 925      (n,d,s) <- readFix r
 926      (k,t)   <- readExp s
 927      return ((n%1)*10^^(k-d), t)
 928  where
 929      readFix r = do
 930         (ds,s)  <- lexDecDigits r
 931         (ds',t) <- lexDotDigits s
 932         return (read (ds++ds'), length ds', t)
 933
 934      readExp (e:s) | e `elem` "eE" = readExp' s
 935      readExp s                     = return (0,s)
 936
 937      readExp' ('+':s) = readDec s
 938      readExp' ('-':s) = do (k,t) <- readDec s
 939                            return (-k,t)
 940      readExp' s       = readDec s
 941
 942      readDec s = do
 943         (ds,r) <- nonnull isDigit s
 944         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
 945                 r)
 946
 947      lexDecDigits = nonnull isDigit
 948
 949      lexDotDigits ('.':s) = return (span isDigit s)
 950      lexDotDigits s       = return ("",s)
 951
 952      nonnull p s = do (cs@(_:_),t) <- return (span p s)
 953                       return (cs,t)
 954
 955 readRational :: String -> Rational -- NB: *does* handle a leading "-"
 956 readRational top_s
 957   = case top_s of
 958       '-' : xs -> - (read_me xs)
 959       xs       -> read_me xs
 960   where
 961     read_me s
 962       = case (do { (x,"") <- readRational__ s ; return x }) of
 963           [x] -> x
 964           []  -> error ("readRational: no parse:"        ++ top_s)
 965           _   -> error ("readRational: ambiguous parse:" ++ top_s)
 966
 967
 968 -----------------------------------------------------------------------------
 969 -- Create a hierarchy of directories
 970
 971 createDirectoryHierarchy :: FilePath -> IO ()
 972 createDirectoryHierarchy dir | isDrive dir = return () -- XXX Hack
 973 createDirectoryHierarchy dir = do
 974   b <- doesDirectoryExist dir
 975   unless b $ do createDirectoryHierarchy (takeDirectory dir)
 976                 createDirectory dir
 977
 978 -----------------------------------------------------------------------------
 979 -- Verify that the 'dirname' portion of a FilePath exists.
 980 --
 981 doesDirNameExist :: FilePath -> IO Bool
 982 doesDirNameExist fpath = case takeDirectory fpath of
 983                          "" -> return True -- XXX Hack
 984                          _  -> doesDirectoryExist (takeDirectory fpath)
 985
 986 -- --------------------------------------------------------------
 987 -- check existence & modification time at the same time
 988
 989 modificationTimeIfExists :: FilePath -> IO (Maybe ClockTime)
 990 modificationTimeIfExists f = do
 991   (do t <- getModificationTime f; return (Just t))
 992         `catchIO` \e -> if isDoesNotExistError e
 993                         then return Nothing
 994                         else ioError e
 995
 996 -- split a string at the last character where 'pred' is True,
 997 -- returning a pair of strings. The first component holds the string
 998 -- up (but not including) the last character for which 'pred' returned
 999 -- True, the second whatever comes after (but also not including the
1000 -- last character).
1001 --
1002 -- If 'pred' returns False for all characters in the string, the original
1003 -- string is returned in the first component (and the second one is just
1004 -- empty).
1005 splitLongestPrefix :: String -> (Char -> Bool) -> (String,String)
1006 splitLongestPrefix str pred
1007   | null r_pre = (str,           [])
1008   | otherwise  = (reverse (tail r_pre), reverse r_suf)
1009                            -- 'tail' drops the char satisfying 'pred'
1010   where (r_suf, r_pre) = break pred (reverse str)
1011
1012 escapeSpaces :: String -> String
1013 escapeSpaces = foldr (\c s -> if isSpace c then '\\':c:s else c:s) ""
1014
1015 type Suffix = String
1016
1017 --------------------------------------------------------------
1018 -- * Search path
1019 --------------------------------------------------------------
1020
1021 -- | The function splits the given string to substrings
1022 -- using the 'searchPathSeparator'.
1023 parseSearchPath :: String -> [FilePath]
1024 parseSearchPath path = split path
1025   where
1026     split :: String -> [String]
1027     split s =
1028       case rest' of
1029         []     -> [chunk]
1030         _:rest -> chunk : split rest
1031       where
1032         chunk =
1033           case chunk' of
1034 #ifdef mingw32_HOST_OS
1035             ('\"':xs@(_:_)) | last xs == '\"' -> init xs
1036 #endif
1037             _                                 -> chunk'
1038
1039         (chunk', rest') = break isSearchPathSeparator s
1040
1041 data Direction = Forwards | Backwards
1042
1043 reslash :: Direction -> FilePath -> FilePath
1044 reslash d = f
1045     where f ('/'  : xs) = slash : f xs
1046           f ('\\' : xs) = slash : f xs
1047           f (x    : xs) = x     : f xs
1048           f ""          = ""
1049           slash = case d of
1050                   Forwards -> '/'
1051                   Backwards -> '\\'
1052 \end{code}
1053
1054 %************************************************************************
1055 %*                                                                      *
1056 \subsection[Utils-Data]{Utils for defining Data instances}
1057 %*                                                                      *
1058 %************************************************************************
1059
1060 These functions helps us to define Data instances for abstract types.
1061
1062 \begin{code}
1063 abstractConstr :: String -> Constr
1064 abstractConstr n = mkConstr (abstractDataType n) ("{abstract:"++n++"}") [] Prefix
1065 \end{code}
1066
1067 \begin{code}
1068 abstractDataType :: String -> DataType
1069 abstractDataType n = mkDataType n [abstractConstr n]
1070 \end{code}
1071
1072 %************************************************************************
1073 %*                                                                      *
1074 \subsection[Utils-C]{Utils for printing C code}
1075 %*                                                                      *
1076 %************************************************************************
1077
1078 \begin{code}
1079 charToC :: Word8 -> String
1080 charToC w =
1081   case chr (fromIntegral w) of
1082         '\"' -> "\\\""
1083         '\'' -> "\\\'"
1084         '\\' -> "\\\\"
1085         c | c >= ' ' && c <= '~' -> [c]
1086           | otherwise -> ['\\',
1087                          chr (ord '0' + ord c `div` 64),
1088                          chr (ord '0' + ord c `div` 8 `mod` 8),
1089                          chr (ord '0' + ord c         `mod` 8)]
1090 \end{code}