compiler/utils/Util.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The University of Glasgow 1992-2002
   4 %
   5
   6 \begin{code}
   7 -- | Highly random utility functions
   8 module Util (
   9         -- * Flags dependent on the compiler build
  10         ghciSupported, debugIsOn, ghciTablesNextToCode, isDynamicGhcLib,
  11         isWindowsHost, isWindowsTarget, isDarwinTarget,
  12
  13         -- * General list processing
  14         zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal,
  15         zipLazy, stretchZipWith,
  16
  17         unzipWith,
  18
  19         mapFst, mapSnd,
  20         mapAndUnzip, mapAndUnzip3,
  21         nOfThem, filterOut, partitionWith, splitEithers,
  22
  23         foldl1', foldl2, count, all2,
  24
  25         lengthExceeds, lengthIs, lengthAtLeast,
  26         listLengthCmp, atLength, equalLength, compareLength,
  27
  28         isSingleton, only, singleton,
  29         notNull, snocView,
  30
  31         isIn, isn'tIn,
  32
  33         -- * Tuples
  34         fstOf3, sndOf3, thirdOf3,
  35
  36         -- * List operations controlled by another list
  37         takeList, dropList, splitAtList, split,
  38         dropTail,
  39
  40         -- * For loop
  41         nTimes,
  42
  43         -- * Sorting
  44         sortLe, sortWith, on,
  45
  46         -- * Comparisons
  47         isEqual, eqListBy,
  48         thenCmp, cmpList,
  49         removeSpaces,
  50
  51         -- * Edit distance
  52         fuzzyMatch,
  53
  54         -- * Transitive closures
  55         transitiveClosure,
  56
  57         -- * Strictness
  58         seqList,
  59
  60         -- * Module names
  61         looksLikeModuleName,
  62
  63         -- * Argument processing
  64         getCmd, toCmdArgs, toArgs,
  65
  66         -- * Floating point
  67         readRational,
  68
  69         -- * IO-ish utilities
  70         createDirectoryHierarchy,
  71         doesDirNameExist,
  72         modificationTimeIfExists,
  73
  74         global, consIORef, globalMVar, globalEmptyMVar,
  75
  76         -- * Filenames and paths
  77         Suffix,
  78         splitLongestPrefix,
  79         escapeSpaces,
  80         parseSearchPath,
  81         Direction(..), reslash,
  82
  83         -- * Utils for defining Data instances
  84         abstractConstr, abstractDataType, mkNoRepType
  85     ) where
  86
  87 #include "HsVersions.h"
  88
  89 import Panic
  90
  91 import Data.Data
  92 import Data.IORef       ( IORef, newIORef, atomicModifyIORef )
  93 import System.IO.Unsafe ( unsafePerformIO )
  94 import Data.List        hiding (group)
  95 import Control.Concurrent.MVar ( MVar, newMVar, newEmptyMVar )
  96
  97 #ifdef DEBUG
  98 import FastTypes
  99 #endif
 100
 101 import Control.Monad    ( unless )
 102 import System.IO.Error as IO ( catch, isDoesNotExistError )
 103 import System.Directory ( doesDirectoryExist, createDirectory,
 104                           getModificationTime )
 105 import System.FilePath
 106 import System.Time      ( ClockTime )
 107
 108 import Data.Char        ( isUpper, isAlphaNum, isSpace, ord, isDigit )
 109 import Data.Ratio       ( (%) )
 110 import Data.Ord         ( comparing )
 111 import Data.Bits
 112 import Data.Word
 113 import qualified Data.IntMap as IM
 114
 115 infixr 9 `thenCmp`
 116 \end{code}
 117
 118 %************************************************************************
 119 %*                                                                      *
 120 \subsection{Is DEBUG on, are we on Windows, etc?}
 121 %*                                                                      *
 122 %************************************************************************
 123
 124 These booleans are global constants, set by CPP flags.  They allow us to
 125 recompile a single module (this one) to change whether or not debug output
 126 appears. They sometimes let us avoid even running CPP elsewhere.
 127
 128 It's important that the flags are literal constants (True/False). Then,
 129 with -0, tests of the flags in other modules will simplify to the correct
 130 branch of the conditional, thereby dropping debug code altogether when
 131 the flags are off.
 132
 133 \begin{code}
 134 ghciSupported :: Bool
 135 #ifdef GHCI
 136 ghciSupported = True
 137 #else
 138 ghciSupported = False
 139 #endif
 140
 141 debugIsOn :: Bool
 142 #ifdef DEBUG
 143 debugIsOn = True
 144 #else
 145 debugIsOn = False
 146 #endif
 147
 148 ghciTablesNextToCode :: Bool
 149 #ifdef GHCI_TABLES_NEXT_TO_CODE
 150 ghciTablesNextToCode = True
 151 #else
 152 ghciTablesNextToCode = False
 153 #endif
 154
 155 isDynamicGhcLib :: Bool
 156 #ifdef DYNAMIC
 157 isDynamicGhcLib = True
 158 #else
 159 isDynamicGhcLib = False
 160 #endif
 161
 162 isWindowsHost :: Bool
 163 #ifdef mingw32_HOST_OS
 164 isWindowsHost = True
 165 #else
 166 isWindowsHost = False
 167 #endif
 168
 169 isWindowsTarget :: Bool
 170 #ifdef mingw32_TARGET_OS
 171 isWindowsTarget = True
 172 #else
 173 isWindowsTarget = False
 174 #endif
 175
 176 isDarwinTarget :: Bool
 177 #ifdef darwin_TARGET_OS
 178 isDarwinTarget = True
 179 #else
 180 isDarwinTarget = False
 181 #endif
 182 \end{code}
 183
 184 %************************************************************************
 185 %*                                                                      *
 186 \subsection{A for loop}
 187 %*                                                                      *
 188 %************************************************************************
 189
 190 \begin{code}
 191 -- | Compose a function with itself n times.  (nth rather than twice)
 192 nTimes :: Int -> (a -> a) -> (a -> a)
 193 nTimes 0 _ = id
 194 nTimes 1 f = f
 195 nTimes n f = f . nTimes (n-1) f
 196 \end{code}
 197
 198 \begin{code}
 199 fstOf3   :: (a,b,c) -> a
 200 sndOf3   :: (a,b,c) -> b
 201 thirdOf3 :: (a,b,c) -> c
 202 fstOf3      (a,_,_) =  a
 203 sndOf3      (_,b,_) =  b
 204 thirdOf3    (_,_,c) =  c
 205 \end{code}
 206
 207 %************************************************************************
 208 %*                                                                      *
 209 \subsection[Utils-lists]{General list processing}
 210 %*                                                                      *
 211 %************************************************************************
 212
 213 \begin{code}
 214 filterOut :: (a->Bool) -> [a] -> [a]
 215 -- ^ Like filter, only it reverses the sense of the test
 216 filterOut _ [] = []
 217 filterOut p (x:xs) | p x       = filterOut p xs
 218                    | otherwise = x : filterOut p xs
 219
 220 partitionWith :: (a -> Either b c) -> [a] -> ([b], [c])
 221 -- ^ Uses a function to determine which of two output lists an input element should join
 222 partitionWith _ [] = ([],[])
 223 partitionWith f (x:xs) = case f x of
 224                          Left  b -> (b:bs, cs)
 225                          Right c -> (bs, c:cs)
 226     where (bs,cs) = partitionWith f xs
 227
 228 splitEithers :: [Either a b] -> ([a], [b])
 229 -- ^ Teases a list of 'Either's apart into two lists
 230 splitEithers [] = ([],[])
 231 splitEithers (e : es) = case e of
 232                         Left x -> (x:xs, ys)
 233                         Right y -> (xs, y:ys)
 234     where (xs,ys) = splitEithers es
 235 \end{code}
 236
 237 A paranoid @zip@ (and some @zipWith@ friends) that checks the lists
 238 are of equal length.  Alastair Reid thinks this should only happen if
 239 DEBUGging on; hey, why not?
 240
 241 \begin{code}
 242 zipEqual        :: String -> [a] -> [b] -> [(a,b)]
 243 zipWithEqual    :: String -> (a->b->c) -> [a]->[b]->[c]
 244 zipWith3Equal   :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d]
 245 zipWith4Equal   :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e]
 246
 247 #ifndef DEBUG
 248 zipEqual      _ = zip
 249 zipWithEqual  _ = zipWith
 250 zipWith3Equal _ = zipWith3
 251 zipWith4Equal _ = zipWith4
 252 #else
 253 zipEqual _   []     []     = []
 254 zipEqual msg (a:as) (b:bs) = (a,b) : zipEqual msg as bs
 255 zipEqual msg _      _      = panic ("zipEqual: unequal lists:"++msg)
 256
 257 zipWithEqual msg z (a:as) (b:bs)=  z a b : zipWithEqual msg z as bs
 258 zipWithEqual _   _ [] []        =  []
 259 zipWithEqual msg _ _ _          =  panic ("zipWithEqual: unequal lists:"++msg)
 260
 261 zipWith3Equal msg z (a:as) (b:bs) (c:cs)
 262                                 =  z a b c : zipWith3Equal msg z as bs cs
 263 zipWith3Equal _   _ [] []  []   =  []
 264 zipWith3Equal msg _ _  _   _    =  panic ("zipWith3Equal: unequal lists:"++msg)
 265
 266 zipWith4Equal msg z (a:as) (b:bs) (c:cs) (d:ds)
 267                                 =  z a b c d : zipWith4Equal msg z as bs cs ds
 268 zipWith4Equal _   _ [] [] [] [] =  []
 269 zipWith4Equal msg _ _  _  _  _  =  panic ("zipWith4Equal: unequal lists:"++msg)
 270 #endif
 271 \end{code}
 272
 273 \begin{code}
 274 -- | 'zipLazy' is a kind of 'zip' that is lazy in the second list (observe the ~)
 275 zipLazy :: [a] -> [b] -> [(a,b)]
 276 zipLazy []     _       = []
 277 -- We want to write this, but with GHC 6.4 we get a warning, so it
 278 -- doesn't validate:
 279 -- zipLazy (x:xs) ~(y:ys) = (x,y) : zipLazy xs ys
 280 -- so we write this instead:
 281 zipLazy (x:xs) zs = let y : ys = zs
 282                     in (x,y) : zipLazy xs ys
 283 \end{code}
 284
 285
 286 \begin{code}
 287 stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c]
 288 -- ^ @stretchZipWith p z f xs ys@ stretches @ys@ by inserting @z@ in
 289 -- the places where @p@ returns @True@
 290
 291 stretchZipWith _ _ _ []     _ = []
 292 stretchZipWith p z f (x:xs) ys
 293   | p x       = f x z : stretchZipWith p z f xs ys
 294   | otherwise = case ys of
 295                 []     -> []
 296                 (y:ys) -> f x y : stretchZipWith p z f xs ys
 297 \end{code}
 298
 299
 300 \begin{code}
 301 mapFst :: (a->c) -> [(a,b)] -> [(c,b)]
 302 mapSnd :: (b->c) -> [(a,b)] -> [(a,c)]
 303
 304 mapFst f xys = [(f x, y) | (x,y) <- xys]
 305 mapSnd f xys = [(x, f y) | (x,y) <- xys]
 306
 307 mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c])
 308
 309 mapAndUnzip _ [] = ([], [])
 310 mapAndUnzip f (x:xs)
 311   = let (r1,  r2)  = f x
 312         (rs1, rs2) = mapAndUnzip f xs
 313     in
 314     (r1:rs1, r2:rs2)
 315
 316 mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d])
 317
 318 mapAndUnzip3 _ [] = ([], [], [])
 319 mapAndUnzip3 f (x:xs)
 320   = let (r1,  r2,  r3)  = f x
 321         (rs1, rs2, rs3) = mapAndUnzip3 f xs
 322     in
 323     (r1:rs1, r2:rs2, r3:rs3)
 324 \end{code}
 325
 326 \begin{code}
 327 nOfThem :: Int -> a -> [a]
 328 nOfThem n thing = replicate n thing
 329
 330 -- | @atLength atLen atEnd ls n@ unravels list @ls@ to position @n@. Precisely:
 331 --
 332 -- @
 333 --  atLength atLenPred atEndPred ls n
 334 --   | n < 0         = atLenPred n
 335 --   | length ls < n = atEndPred (n - length ls)
 336 --   | otherwise     = atLenPred (drop n ls)
 337 -- @
 338 atLength :: ([a] -> b)
 339          -> (Int -> b)
 340          -> [a]
 341          -> Int
 342          -> b
 343 atLength atLenPred atEndPred ls n
 344   | n < 0     = atEndPred n
 345   | otherwise = go n ls
 346   where
 347     go n [] = atEndPred n
 348     go 0 ls = atLenPred ls
 349     go n (_:xs) = go (n-1) xs
 350
 351 -- Some special cases of atLength:
 352
 353 lengthExceeds :: [a] -> Int -> Bool
 354 -- ^ > (lengthExceeds xs n) = (length xs > n)
 355 lengthExceeds = atLength notNull (const False)
 356
 357 lengthAtLeast :: [a] -> Int -> Bool
 358 lengthAtLeast = atLength notNull (== 0)
 359
 360 lengthIs :: [a] -> Int -> Bool
 361 lengthIs = atLength null (==0)
 362
 363 listLengthCmp :: [a] -> Int -> Ordering
 364 listLengthCmp = atLength atLen atEnd
 365  where
 366   atEnd 0      = EQ
 367   atEnd x
 368    | x > 0     = LT -- not yet seen 'n' elts, so list length is < n.
 369    | otherwise = GT
 370
 371   atLen []     = EQ
 372   atLen _      = GT
 373
 374 equalLength :: [a] -> [b] -> Bool
 375 equalLength []     []     = True
 376 equalLength (_:xs) (_:ys) = equalLength xs ys
 377 equalLength _      _      = False
 378
 379 compareLength :: [a] -> [b] -> Ordering
 380 compareLength []     []     = EQ
 381 compareLength (_:xs) (_:ys) = compareLength xs ys
 382 compareLength []     _      = LT
 383 compareLength _      []     = GT
 384
 385 ----------------------------
 386 singleton :: a -> [a]
 387 singleton x = [x]
 388
 389 isSingleton :: [a] -> Bool
 390 isSingleton [_] = True
 391 isSingleton _   = False
 392
 393 notNull :: [a] -> Bool
 394 notNull [] = False
 395 notNull _  = True
 396
 397 only :: [a] -> a
 398 #ifdef DEBUG
 399 only [a] = a
 400 #else
 401 only (a:_) = a
 402 #endif
 403 only _ = panic "Util: only"
 404 \end{code}
 405
 406 Debugging/specialising versions of \tr{elem} and \tr{notElem}
 407
 408 \begin{code}
 409 isIn, isn'tIn :: Eq a => String -> a -> [a] -> Bool
 410
 411 # ifndef DEBUG
 412 isIn    _msg x ys = x `elem` ys
 413 isn'tIn _msg x ys = x `notElem` ys
 414
 415 # else /* DEBUG */
 416 isIn msg x ys
 417   = elem100 (_ILIT(0)) x ys
 418   where
 419     elem100 _ _ []        = False
 420     elem100 i x (y:ys)
 421       | i ># _ILIT(100) = trace ("Over-long elem in " ++ msg)
 422                                 (x `elem` (y:ys))
 423       | otherwise       = x == y || elem100 (i +# _ILIT(1)) x ys
 424
 425 isn'tIn msg x ys
 426   = notElem100 (_ILIT(0)) x ys
 427   where
 428     notElem100 _ _ [] =  True
 429     notElem100 i x (y:ys)
 430       | i ># _ILIT(100) = trace ("Over-long notElem in " ++ msg)
 431                                 (x `notElem` (y:ys))
 432       | otherwise      =  x /= y && notElem100 (i +# _ILIT(1)) x ys
 433 # endif /* DEBUG */
 434 \end{code}
 435
 436 %************************************************************************
 437 %*                                                                      *
 438 \subsubsection[Utils-Carsten-mergesort]{A mergesort from Carsten}
 439 %*                                                                      *
 440 %************************************************************************
 441
 442 \begin{display}
 443 Date: Mon, 3 May 93 20:45:23 +0200
 444 From: Carsten Kehler Holst <kehler@cs.chalmers.se>
 445 To: partain@dcs.gla.ac.uk
 446 Subject: natural merge sort beats quick sort [ and it is prettier ]
 447
 448 Here is a piece of Haskell code that I'm rather fond of. See it as an
 449 attempt to get rid of the ridiculous quick-sort routine. group is
 450 quite useful by itself I think it was John's idea originally though I
 451 believe the lazy version is due to me [surprisingly complicated].
 452 gamma [used to be called] is called gamma because I got inspired by
 453 the Gamma calculus. It is not very close to the calculus but does
 454 behave less sequentially than both foldr and foldl. One could imagine
 455 a version of gamma that took a unit element as well thereby avoiding
 456 the problem with empty lists.
 457
 458 I've tried this code against
 459
 460    1) insertion sort - as provided by haskell
 461    2) the normal implementation of quick sort
 462    3) a deforested version of quick sort due to Jan Sparud
 463    4) a super-optimized-quick-sort of Lennart's
 464
 465 If the list is partially sorted both merge sort and in particular
 466 natural merge sort wins. If the list is random [ average length of
 467 rising subsequences = approx 2 ] mergesort still wins and natural
 468 merge sort is marginally beaten by Lennart's soqs. The space
 469 consumption of merge sort is a bit worse than Lennart's quick sort
 470 approx a factor of 2. And a lot worse if Sparud's bug-fix [see his
 471 fpca article ] isn't used because of group.
 472
 473 have fun
 474 Carsten
 475 \end{display}
 476
 477 \begin{code}
 478 group :: (a -> a -> Bool) -> [a] -> [[a]]
 479 -- Given a <= function, group finds maximal contiguous up-runs
 480 -- or down-runs in the input list.
 481 -- It's stable, in the sense that it never re-orders equal elements
 482 --
 483 -- Date: Mon, 12 Feb 1996 15:09:41 +0000
 484 -- From: Andy Gill <andy@dcs.gla.ac.uk>
 485 -- Here is a `better' definition of group.
 486
 487 group _ []     = []
 488 group p (x:xs) = group' xs x x (x :)
 489   where
 490     group' []     _     _     s  = [s []]
 491     group' (x:xs) x_min x_max s
 492         |      x_max `p` x  = group' xs x_min x     (s . (x :))
 493         | not (x_min `p` x) = group' xs x     x_max ((x :) . s)
 494         | otherwise         = s [] : group' xs x x (x :)
 495         -- NB: the 'not' is essential for stablity
 496         --     x `p` x_min would reverse equal elements
 497
 498 generalMerge :: (a -> a -> Bool) -> [a] -> [a] -> [a]
 499 generalMerge _ xs [] = xs
 500 generalMerge _ [] ys = ys
 501 generalMerge p (x:xs) (y:ys) | x `p` y   = x : generalMerge p xs     (y:ys)
 502                              | otherwise = y : generalMerge p (x:xs) ys
 503
 504 -- gamma is now called balancedFold
 505
 506 balancedFold :: (a -> a -> a) -> [a] -> a
 507 balancedFold _ [] = error "can't reduce an empty list using balancedFold"
 508 balancedFold _ [x] = x
 509 balancedFold f l  = balancedFold f (balancedFold' f l)
 510
 511 balancedFold' :: (a -> a -> a) -> [a] -> [a]
 512 balancedFold' f (x:y:xs) = f x y : balancedFold' f xs
 513 balancedFold' _ xs = xs
 514
 515 generalNaturalMergeSort :: (a -> a -> Bool) -> [a] -> [a]
 516 generalNaturalMergeSort _ [] = []
 517 generalNaturalMergeSort p xs = (balancedFold (generalMerge p) . group p) xs
 518
 519 #if NOT_USED
 520 generalMergeSort p [] = []
 521 generalMergeSort p xs = (balancedFold (generalMerge p) . map (: [])) xs
 522
 523 mergeSort, naturalMergeSort :: Ord a => [a] -> [a]
 524
 525 mergeSort = generalMergeSort (<=)
 526 naturalMergeSort = generalNaturalMergeSort (<=)
 527
 528 mergeSortLe le = generalMergeSort le
 529 #endif
 530
 531 sortLe :: (a->a->Bool) -> [a] -> [a]
 532 sortLe le = generalNaturalMergeSort le
 533
 534 sortWith :: Ord b => (a->b) -> [a] -> [a]
 535 sortWith get_key xs = sortLe le xs
 536   where
 537     x `le` y = get_key x < get_key y
 538
 539 on :: (a -> a -> c) -> (b -> a) -> b -> b -> c
 540 on cmp sel = \x y -> sel x `cmp` sel y
 541
 542 \end{code}
 543
 544 %************************************************************************
 545 %*                                                                      *
 546 \subsection[Utils-transitive-closure]{Transitive closure}
 547 %*                                                                      *
 548 %************************************************************************
 549
 550 This algorithm for transitive closure is straightforward, albeit quadratic.
 551
 552 \begin{code}
 553 transitiveClosure :: (a -> [a])         -- Successor function
 554                   -> (a -> a -> Bool)   -- Equality predicate
 555                   -> [a]
 556                   -> [a]                -- The transitive closure
 557
 558 transitiveClosure succ eq xs
 559  = go [] xs
 560  where
 561    go done []                      = done
 562    go done (x:xs) | x `is_in` done = go done xs
 563                   | otherwise      = go (x:done) (succ x ++ xs)
 564
 565    _ `is_in` []                 = False
 566    x `is_in` (y:ys) | eq x y    = True
 567                     | otherwise = x `is_in` ys
 568 \end{code}
 569
 570 %************************************************************************
 571 %*                                                                      *
 572 \subsection[Utils-accum]{Accumulating}
 573 %*                                                                      *
 574 %************************************************************************
 575
 576 A combination of foldl with zip.  It works with equal length lists.
 577
 578 \begin{code}
 579 foldl2 :: (acc -> a -> b -> acc) -> acc -> [a] -> [b] -> acc
 580 foldl2 _ z [] [] = z
 581 foldl2 k z (a:as) (b:bs) = foldl2 k (k z a b) as bs
 582 foldl2 _ _ _      _      = panic "Util: foldl2"
 583
 584 all2 :: (a -> b -> Bool) -> [a] -> [b] -> Bool
 585 -- True if the lists are the same length, and
 586 -- all corresponding elements satisfy the predicate
 587 all2 _ []     []     = True
 588 all2 p (x:xs) (y:ys) = p x y && all2 p xs ys
 589 all2 _ _      _      = False
 590 \end{code}
 591
 592 Count the number of times a predicate is true
 593
 594 \begin{code}
 595 count :: (a -> Bool) -> [a] -> Int
 596 count _ [] = 0
 597 count p (x:xs) | p x       = 1 + count p xs
 598                | otherwise = count p xs
 599 \end{code}
 600
 601 @splitAt@, @take@, and @drop@ but with length of another
 602 list giving the break-off point:
 603
 604 \begin{code}
 605 takeList :: [b] -> [a] -> [a]
 606 takeList [] _ = []
 607 takeList (_:xs) ls =
 608    case ls of
 609      [] -> []
 610      (y:ys) -> y : takeList xs ys
 611
 612 dropList :: [b] -> [a] -> [a]
 613 dropList [] xs    = xs
 614 dropList _  xs@[] = xs
 615 dropList (_:xs) (_:ys) = dropList xs ys
 616
 617
 618 splitAtList :: [b] -> [a] -> ([a], [a])
 619 splitAtList [] xs     = ([], xs)
 620 splitAtList _ xs@[]   = (xs, xs)
 621 splitAtList (_:xs) (y:ys) = (y:ys', ys'')
 622     where
 623       (ys', ys'') = splitAtList xs ys
 624
 625 -- drop from the end of a list
 626 dropTail :: Int -> [a] -> [a]
 627 dropTail n = reverse . drop n . reverse
 628
 629 snocView :: [a] -> Maybe ([a],a)
 630         -- Split off the last element
 631 snocView [] = Nothing
 632 snocView xs = go [] xs
 633             where
 634                 -- Invariant: second arg is non-empty
 635               go acc [x]    = Just (reverse acc, x)
 636               go acc (x:xs) = go (x:acc) xs
 637               go _   []     = panic "Util: snocView"
 638
 639 split :: Char -> String -> [String]
 640 split c s = case rest of
 641                 []     -> [chunk]
 642                 _:rest -> chunk : split c rest
 643   where (chunk, rest) = break (==c) s
 644 \end{code}
 645
 646
 647 %************************************************************************
 648 %*                                                                      *
 649 \subsection[Utils-comparison]{Comparisons}
 650 %*                                                                      *
 651 %************************************************************************
 652
 653 \begin{code}
 654 isEqual :: Ordering -> Bool
 655 -- Often used in (isEqual (a `compare` b))
 656 isEqual GT = False
 657 isEqual EQ = True
 658 isEqual LT = False
 659
 660 thenCmp :: Ordering -> Ordering -> Ordering
 661 {-# INLINE thenCmp #-}
 662 thenCmp EQ       ordering = ordering
 663 thenCmp ordering _        = ordering
 664
 665 eqListBy :: (a->a->Bool) -> [a] -> [a] -> Bool
 666 eqListBy _  []     []     = True
 667 eqListBy eq (x:xs) (y:ys) = eq x y && eqListBy eq xs ys
 668 eqListBy _  _      _      = False
 669
 670 cmpList :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering
 671     -- `cmpList' uses a user-specified comparer
 672
 673 cmpList _   []     [] = EQ
 674 cmpList _   []     _  = LT
 675 cmpList _   _      [] = GT
 676 cmpList cmp (a:as) (b:bs)
 677   = case cmp a b of { EQ -> cmpList cmp as bs; xxx -> xxx }
 678 \end{code}
 679
 680 \begin{code}
 681 removeSpaces :: String -> String
 682 removeSpaces = reverse . dropWhile isSpace . reverse . dropWhile isSpace
 683 \end{code}
 684
 685 %************************************************************************
 686 %*                                                                      *
 687 \subsection{Edit distance}
 688 %*                                                                      *
 689 %************************************************************************
 690
 691 \begin{code}
 692 -- | Find the "restricted" Damerau-Levenshtein edit distance between two strings. See: <http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance>.
 693 -- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro).
 694 -- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation
 695 restrictedDamerauLevenshteinDistance :: String -> String -> Int
 696 restrictedDamerauLevenshteinDistance str1 str2 = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
 697   where
 698     m = length str1
 699     n = length str2
 700
 701 restrictedDamerauLevenshteinDistanceWithLengths :: Int -> Int -> String -> String -> Int
 702 restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
 703   | m <= n    = if n <= 32 -- n must be larger so this check is sufficient
 704                 then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2
 705                 else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2
 706   | otherwise = if m <= 32 -- m must be larger so this check is sufficient
 707                 then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1
 708                 else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1
 709
 710 restrictedDamerauLevenshteinDistance' :: (Bits bv) => bv -> Int -> Int -> String -> String -> Int
 711 restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2
 712   | [] <- str1 = n
 713   | otherwise  = extractAnswer $ foldl' (restrictedDamerauLevenshteinDistanceWorker (matchVectors str1) top_bit_mask vector_mask) (0, 0, m_ones, 0, m) str2
 714   where m_ones@vector_mask = (2 ^ m) - 1
 715         top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy
 716         extractAnswer (_, _, _, _, distance) = distance
 717
 718 restrictedDamerauLevenshteinDistanceWorker :: (Bits bv) => IM.IntMap bv -> bv -> bv -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int)
 719 restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask (pm, d0, vp, vn, distance) char2
 720   = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $ seq pm' $ seq d0' $ seq vp' $ seq vn' $ seq distance'' $ seq char2 $ (pm', d0', vp', vn', distance'')
 721   where
 722     pm' = IM.findWithDefault 0 (ord char2) str1_mvs
 723
 724     d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm) -- No need to mask the shiftL because of the restricted range of pm
 725       .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn
 726     hp' = vn .|. sizedComplement vector_mask (d0' .|. vp)
 727     hn' = d0' .&. vp
 728
 729     hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask
 730     hn'_shift = (hn' `shiftL` 1) .&. vector_mask
 731     vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift)
 732     vn' = d0' .&. hp'_shift
 733
 734     distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance
 735     distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance'
 736
 737 sizedComplement :: Bits bv => bv -> bv -> bv
 738 sizedComplement vector_mask vect = vector_mask `xor` vect
 739
 740 matchVectors :: Bits bv => String -> IM.IntMap bv
 741 matchVectors = snd . foldl' go (0 :: Int, IM.empty)
 742   where
 743     go (ix, im) char = let ix' = ix + 1
 744                            im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im
 745                        in seq ix' $ seq im' $ (ix', im')
 746
 747 #ifdef __GLASGOW_HASKELL__
 748 {-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance' :: Word32 -> Int -> Int -> String -> String -> Int #-}
 749 {-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance' :: Integer -> Int -> Int -> String -> String -> Int #-}
 750
 751 {-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker :: IM.IntMap Word32 -> Word32 -> Word32 -> (Word32, Word32, Word32, Word32, Int) -> Char -> (Word32, Word32, Word32, Word32, Int) #-}
 752 {-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker :: IM.IntMap Integer -> Integer -> Integer -> (Integer, Integer, Integer, Integer, Int) -> Char -> (Integer, Integer, Integer, Integer, Int) #-}
 753
 754 {-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-}
 755 {-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-}
 756
 757 {-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-}
 758 {-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-}
 759 #endif
 760
 761 -- | Search for possible matches to the users input in the given list, returning a small number of ranked results
 762 fuzzyMatch :: String -> [String] -> [String]
 763 fuzzyMatch user_entered possibilites = map fst $ take mAX_RESULTS $ sortBy (comparing snd)
 764                                                 [ (poss, distance) | poss <- possibilites
 765                                                                    , let distance = restrictedDamerauLevenshteinDistance poss user_entered
 766                                                                    , distance <= fuzzy_threshold ]
 767   where -- Work out an approriate match threshold (about a quarter of the # of characters the user entered)
 768         fuzzy_threshold = max (round $ fromInteger (genericLength user_entered) / (4 :: Rational)) 1
 769         mAX_RESULTS = 3
 770 \end{code}
 771
 772 %************************************************************************
 773 %*                                                                      *
 774 \subsection[Utils-pairs]{Pairs}
 775 %*                                                                      *
 776 %************************************************************************
 777
 778 \begin{code}
 779 unzipWith :: (a -> b -> c) -> [(a, b)] -> [c]
 780 unzipWith f pairs = map ( \ (a, b) -> f a b ) pairs
 781 \end{code}
 782
 783 \begin{code}
 784 seqList :: [a] -> b -> b
 785 seqList [] b = b
 786 seqList (x:xs) b = x `seq` seqList xs b
 787 \end{code}
 788
 789 Global variables:
 790
 791 \begin{code}
 792 global :: a -> IORef a
 793 global a = unsafePerformIO (newIORef a)
 794 \end{code}
 795
 796 \begin{code}
 797 consIORef :: IORef [a] -> a -> IO ()
 798 consIORef var x = do
 799   atomicModifyIORef var (\xs -> (x:xs,()))
 800 \end{code}
 801
 802 \begin{code}
 803 globalMVar :: a -> MVar a
 804 globalMVar a = unsafePerformIO (newMVar a)
 805
 806 globalEmptyMVar :: MVar a
 807 globalEmptyMVar = unsafePerformIO newEmptyMVar
 808 \end{code}
 809
 810 Module names:
 811
 812 \begin{code}
 813 looksLikeModuleName :: String -> Bool
 814 looksLikeModuleName [] = False
 815 looksLikeModuleName (c:cs) = isUpper c && go cs
 816   where go [] = True
 817         go ('.':cs) = looksLikeModuleName cs
 818         go (c:cs)   = (isAlphaNum c || c == '_' || c == '\'') && go cs
 819 \end{code}
 820
 821 Akin to @Prelude.words@, but acts like the Bourne shell, treating
 822 quoted strings as Haskell Strings, and also parses Haskell [String]
 823 syntax.
 824
 825 \begin{code}
 826 getCmd :: String -> Either String             -- Error
 827                            (String, String) -- (Cmd, Rest)
 828 getCmd s = case break isSpace $ dropWhile isSpace s of
 829            ([], _) -> Left ("Couldn't find command in " ++ show s)
 830            res -> Right res
 831
 832 toCmdArgs :: String -> Either String             -- Error
 833                               (String, [String]) -- (Cmd, Args)
 834 toCmdArgs s = case getCmd s of
 835               Left err -> Left err
 836               Right (cmd, s') -> case toArgs s' of
 837                                  Left err -> Left err
 838                                  Right args -> Right (cmd, args)
 839
 840 toArgs :: String -> Either String   -- Error
 841                            [String] -- Args
 842 toArgs str
 843     = case dropWhile isSpace str of
 844       s@('[':_) -> case reads s of
 845                    [(args, spaces)]
 846                     | all isSpace spaces ->
 847                        Right args
 848                    _ ->
 849                        Left ("Couldn't read " ++ show str ++ "as [String]")
 850       s -> toArgs' s
 851  where
 852   toArgs' s = case dropWhile isSpace s of
 853               [] -> Right []
 854               ('"' : _) -> case reads s of
 855                            [(arg, rest)]
 856                               -- rest must either be [] or start with a space
 857                             | all isSpace (take 1 rest) ->
 858                                case toArgs' rest of
 859                                Left err -> Left err
 860                                Right args -> Right (arg : args)
 861                            _ ->
 862                                Left ("Couldn't read " ++ show s ++ "as String")
 863               s' -> case break isSpace s' of
 864                     (arg, s'') -> case toArgs' s'' of
 865                                   Left err -> Left err
 866                                   Right args -> Right (arg : args)
 867 \end{code}
 868
 869 -- -----------------------------------------------------------------------------
 870 -- Floats
 871
 872 \begin{code}
 873 readRational__ :: ReadS Rational -- NB: doesn't handle leading "-"
 874 readRational__ r = do
 875      (n,d,s) <- readFix r
 876      (k,t)   <- readExp s
 877      return ((n%1)*10^^(k-d), t)
 878  where
 879      readFix r = do
 880         (ds,s)  <- lexDecDigits r
 881         (ds',t) <- lexDotDigits s
 882         return (read (ds++ds'), length ds', t)
 883
 884      readExp (e:s) | e `elem` "eE" = readExp' s
 885      readExp s                     = return (0,s)
 886
 887      readExp' ('+':s) = readDec s
 888      readExp' ('-':s) = do (k,t) <- readDec s
 889                            return (-k,t)
 890      readExp' s       = readDec s
 891
 892      readDec s = do
 893         (ds,r) <- nonnull isDigit s
 894         return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
 895                 r)
 896
 897      lexDecDigits = nonnull isDigit
 898
 899      lexDotDigits ('.':s) = return (span isDigit s)
 900      lexDotDigits s       = return ("",s)
 901
 902      nonnull p s = do (cs@(_:_),t) <- return (span p s)
 903                       return (cs,t)
 904
 905 readRational :: String -> Rational -- NB: *does* handle a leading "-"
 906 readRational top_s
 907   = case top_s of
 908       '-' : xs -> - (read_me xs)
 909       xs       -> read_me xs
 910   where
 911     read_me s
 912       = case (do { (x,"") <- readRational__ s ; return x }) of
 913           [x] -> x
 914           []  -> error ("readRational: no parse:"        ++ top_s)
 915           _   -> error ("readRational: ambiguous parse:" ++ top_s)
 916
 917
 918 -----------------------------------------------------------------------------
 919 -- Create a hierarchy of directories
 920
 921 createDirectoryHierarchy :: FilePath -> IO ()
 922 createDirectoryHierarchy dir | isDrive dir = return () -- XXX Hack
 923 createDirectoryHierarchy dir = do
 924   b <- doesDirectoryExist dir
 925   unless b $ do createDirectoryHierarchy (takeDirectory dir)
 926                 createDirectory dir
 927
 928 -----------------------------------------------------------------------------
 929 -- Verify that the 'dirname' portion of a FilePath exists.
 930 --
 931 doesDirNameExist :: FilePath -> IO Bool
 932 doesDirNameExist fpath = case takeDirectory fpath of
 933                          "" -> return True -- XXX Hack
 934                          _  -> doesDirectoryExist (takeDirectory fpath)
 935
 936 -- --------------------------------------------------------------
 937 -- check existence & modification time at the same time
 938
 939 modificationTimeIfExists :: FilePath -> IO (Maybe ClockTime)
 940 modificationTimeIfExists f = do
 941   (do t <- getModificationTime f; return (Just t))
 942         `IO.catch` \e -> if isDoesNotExistError e
 943                          then return Nothing
 944                          else ioError e
 945
 946 -- split a string at the last character where 'pred' is True,
 947 -- returning a pair of strings. The first component holds the string
 948 -- up (but not including) the last character for which 'pred' returned
 949 -- True, the second whatever comes after (but also not including the
 950 -- last character).
 951 --
 952 -- If 'pred' returns False for all characters in the string, the original
 953 -- string is returned in the first component (and the second one is just
 954 -- empty).
 955 splitLongestPrefix :: String -> (Char -> Bool) -> (String,String)
 956 splitLongestPrefix str pred
 957   | null r_pre = (str,           [])
 958   | otherwise  = (reverse (tail r_pre), reverse r_suf)
 959                            -- 'tail' drops the char satisfying 'pred'
 960   where (r_suf, r_pre) = break pred (reverse str)
 961
 962 escapeSpaces :: String -> String
 963 escapeSpaces = foldr (\c s -> if isSpace c then '\\':c:s else c:s) ""
 964
 965 type Suffix = String
 966
 967 --------------------------------------------------------------
 968 -- * Search path
 969 --------------------------------------------------------------
 970
 971 -- | The function splits the given string to substrings
 972 -- using the 'searchPathSeparator'.
 973 parseSearchPath :: String -> [FilePath]
 974 parseSearchPath path = split path
 975   where
 976     split :: String -> [String]
 977     split s =
 978       case rest' of
 979         []     -> [chunk]
 980         _:rest -> chunk : split rest
 981       where
 982         chunk =
 983           case chunk' of
 984 #ifdef mingw32_HOST_OS
 985             ('\"':xs@(_:_)) | last xs == '\"' -> init xs
 986 #endif
 987             _                                 -> chunk'
 988
 989         (chunk', rest') = break isSearchPathSeparator s
 990
 991 data Direction = Forwards | Backwards
 992
 993 reslash :: Direction -> FilePath -> FilePath
 994 reslash d = f
 995     where f ('/'  : xs) = slash : f xs
 996           f ('\\' : xs) = slash : f xs
 997           f (x    : xs) = x     : f xs
 998           f ""          = ""
 999           slash = case d of
1000                   Forwards -> '/'
1001                   Backwards -> '\\'
1002 \end{code}
1003
1004 %************************************************************************
1005 %*                                                                      *
1006 \subsection[Utils-Data]{Utils for defining Data instances}
1007 %*                                                                      *
1008 %************************************************************************
1009
1010 These functions helps us to define Data instances for abstract types.
1011
1012 \begin{code}
1013 abstractConstr :: String -> Constr
1014 abstractConstr n = mkConstr (abstractDataType n) ("{abstract:"++n++"}") [] Prefix
1015 \end{code}
1016
1017 \begin{code}
1018 abstractDataType :: String -> DataType
1019 abstractDataType n = mkDataType n [abstractConstr n]
1020 \end{code}
1021
1022 \begin{code}
1023 -- Old GHC versions come with a base library with this function misspelled.
1024 #if __GLASGOW_HASKELL__ < 612
1025 mkNoRepType :: String -> DataType
1026 mkNoRepType = mkNorepType
1027 #endif
1028 \end{code}
1029