From 0e25c5f2daeb3bca86501cb92cc785a5e698906e Mon Sep 17 00:00:00 2001 From: adam Date: Wed, 28 Jan 2009 14:03:02 -0800 Subject: [PATCH] update handling of %-escapes in urls darcs-hash:20090128220302-5007d-64ebdc2cfa8211109077588c45af2c9269435e9c.gz --- src/Doc.lhs | 14 ++++++++------ src/url.g | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/Doc.lhs b/src/Doc.lhs index 1d2c119..6d1baa7 100644 --- a/src/Doc.lhs +++ b/src/Doc.lhs @@ -182,11 +182,13 @@ instance Show URL where instance FromTree URL where fromTree (Tree "URL" stuff _) = fromTrees stuff fromTree (Tree "Email" [(Tree "username" un _),host] _) = Email (fromTrees un) (fromTree host) - fromTree (Tree "Path" stuff _) = URLPath $ map fromUrlChar stuff - where - fromUrlChar (Tree "%" [(Tree a [] _),(Tree b [] _)] _) = chr $ (fst $ head $ readHex (a++b)) - fromUrlChar (Tree [c] [] _) = c - fromUrlChar t = error $ "could not parse as an url char: " ++ (show t) + fromTree (Tree "Path" stuff _) = URLPath $ concatMap fromUrlChar stuff + +--fromUrlChar (Tree "%" [(Tree a [] _),(Tree b [] _)] _) = chr $ (fst $ head $ readHex (a++b)) +-- FIXME: problem here is the "/" vs "%2F" issue, so we "leave urls urlencoded" +fromUrlChar (Tree "%" [(Tree a [] _),(Tree b [] _)] _) = "%"++a++b +fromUrlChar (Tree [c] [] _) = [c] +fromUrlChar t = error $ "could not parse as an url char: " ++ (show t) fromTreeChildren (Tree _ c _) = fromTrees c instance FromTrees URL where @@ -195,7 +197,7 @@ instance FromTrees URL where url_host = fromTree host, url_login = Nothing, url_port = case port of { (Tree "Port" port _) -> Just $ fromTrees port; _ -> Nothing }, - url_path = case rest of { ((Tree "Path" p _):_) -> fromTrees p; _ -> "" }, + url_path = case rest of { ((Tree "Path" p _):_) -> concatMap fromUrlChar p; _ -> "" }, url_ref = case rest of { (_:(Tree "Path" r _):_) -> Just $ fromTrees r; _ -> Nothing } } fromTrees x = error $ show x diff --git a/src/url.g b/src/url.g index 05ba612..4ff664a 100644 --- a/src/url.g +++ b/src/url.g @@ -27,6 +27,6 @@ username:: = [a-zA-Z0-9;/?:&=$\-_.+]++ password:: = [a-zA-Z0-9;/?:&=$\-_.+]++ method:: = [+\-.a-z0-9]+ urlchar = urlc - | "%":: "%" [0-9] [0-9] + | "%":: "%" [0-9a-fA-F] [0-9a-fA-F] urlc = [a-zA-Z0-9;/?:&=$\-_.+] | [@~,] // technically illegal (RFC1738) -- 1.7.10.4