← Back to Upcase

Code snippet: find URLs that redirect and replace them with their targets


(Joe Ferris) #1

I wrote this a while back to find URLs in our guides that are short URLs or have been permanently redirected since we added them (result). I originally posted this as a gist, but I thought I’d cross-post it here as a Haskell example and to ask for feedback and questions for anybody interested in Haskell or functional programming.

import Control.Applicative
import Data.List.Utils
import Data.Maybe
import Network.HTTP
import Network.HTTP.Headers
import System.IO
import Text.Regex.PCRE

main = do
  line <- getLine
  replacedLine <- replaceRedirects line
  putStrLn replacedLine
  eof <- isEOF
  if eof then return () else main

replaceRedirects :: String -> IO String
replaceRedirects line = foldl replaceRedirect (pure line) $ matchUrls line

replaceRedirect :: IO String -> String -> IO String
replaceRedirect line url =
  replace <$> pure url <*> targetUrlFromUrl url <*> line

targetUrlFromUrl :: String -> IO String
targetUrlFromUrl sourceUrl =
  simpleHTTP (getRequest sourceUrl) >>=
    return . either (\_ -> sourceUrl) targetUrl
  where targetUrl = fromMaybe sourceUrl . findHeader HdrLocation

matchUrls :: String -> [String]
matchUrls line = map head $ line =~ "http://[^ ]+"