{-# LANGUAGE ScopedTypeVariables #-}
import Control.Concurrent (threadDelay)
import qualified Control.Exception as CE (catch, IOException)
import Control.Monad (liftM, unless, void, when)
import Data.List (delete)
import qualified Data.Set as S (fromList, toList)
import Data.Maybe (fromMaybe)
import Network.HTTP (getRequest, simpleHTTP)
import Network.URI (isURI)
import System.Environment (getArgs)
import System.Process (runCommand, terminateProcess)
import qualified Data.ByteString.Char8 as B (length, lines, readFile, unlines, unpack, writeFile, ByteString)
import System.Random (getStdRandom, randomR)

import Network.URL.Archiver (checkArchive)

main :: IO ()
main = do args <- getArgs
          case args of
           (f:[])       -> archivePage f Nothing  Nothing  Nothing
           (f:e:[])     -> archivePage f (Just e) Nothing  Nothing
           (f:e:s:[])   -> archivePage f (Just e) (Just s) Nothing
           (f:e:s:n:[]) -> archivePage f (Just e) (Just s) (Just (read n :: Int))
           _ -> error "must supply a filename, or a filename and an email address"

archivePage :: FilePath -> Maybe String -> Maybe String -> Maybe Int -> IO ()
archivePage file email sh n = do -- default: 48 seconds (converted to milliseconds)
                                 let n' = 1000000 * fromMaybe 48 n
                                 let loop = archivePage file email sh n
                                 connectedp <- CE.catch (simpleHTTP (getRequest "http://www.webcitation.org")) (\(_::CE.IOException) -> return (Left undefined))
                                 case connectedp of
                                   Left _  -> -- Left = ConnError, network not working! sleep for a minute and try again later
                                     threadDelay n' >> loop
                                   Right _ -> do -- we have access to the WWW, it seems. proceeding with mission!
                                     contents <- B.readFile file
                                     when (B.length contents == 0) $ threadDelay n'
                                     (url,rest) <- splitRandom contents
                                     let url' = B.unpack url
                                     let email' =  fromMaybe "nobody@mailinator.com" email
                                     when (isURI url') $ do checkArchive email' url'
                                                            print url'
                                                            hdl <- case sh of
                                                                        Nothing -> return Nothing
                                                                        Just sh' -> return $ Just (runCommand (sh' ++ " " ++ url'))
                                                            -- banned >=100 requests/hour; choke it
                                                            threadDelay n'
                                                            case hdl of
                                                              Nothing -> return ()
                                                              Just hdl' -> void $ liftM terminateProcess hdl' -- GC
                                     unless (null rest) (writePages file url >> loop) -- rid of leading \n

-- re-reads a possibly modified 'file' from disk, removes the archived URL from it, and writes it back out for 'archivePage' to read immediately
writePages :: FilePath -> B.ByteString -> IO ()
writePages file done = do original <- liftM B.lines $ B.readFile file
                          let sorted =  S.toList $ S.fromList original
                          let final = B.unlines $ filter (not . (== done)) sorted
                          B.writeFile file final

-- pick a random entry in the list
splitRandom :: B.ByteString -> IO (B.ByteString, [B.ByteString])
splitRandom s = do let ss = B.lines s
                   let l  = length ss
                   i <- getStdRandom (randomR (0,l))
                   let randpick = if length ss > 1 then ss !! i else head ss
                   let removed = Data.List.delete randpick ss
                   return (randpick, removed)
