rss2irc-1.2/rss2irc.hs0000644000000000000000000001047512456021414013062 0ustar0000000000000000#!/usr/bin/env runhaskell {-# LANGUAGE PatternGuards, BangPatterns, DeriveDataTypeable, OverloadedStrings, ScopedTypeVariables #-} -------------------------------------------------------------------- {- | rss2irc - watches an RSS/Atom feed and writes it to an IRC channel. Copyright (c) Don Stewart 2008-2009, Simon Michael 2009-2014 License: BSD3. -} -------------------------------------------------------------------- module Main where import Control.Concurrent import Control.Concurrent.STM import Control.Exception import Control.Monad (when,unless) import Data.Maybe import Data.Time.Clock (getCurrentTime) import Prelude hiding (log) import Network (withSocketsDo) import Network.HTTP.Client (defaultManagerSettings) import Network.HTTP.Conduit (newManager) import System.Console.CmdArgs import System.Exit (ExitCode(ExitSuccess), exitFailure, exitSuccess) import System.IO import System.IO.Storage import Text.Printf (printf) import Base import Utils import Feed import Irc -- | Get parsed and checked options and a reader and bot ready to connect. getRss2IrcArgs :: IO (Opts, Reader, Bot) getRss2IrcArgs = do opts <- cmdArgs defopts >>= \opts -> do unless (interval opts > 0 || (maybe False (<=10) $ num_iterations opts)) $ opterror "--interval 0 requires --num-iterations 10 or less" seq (applyReplacements opts "") $ return () -- report any bad --replace regexp return opts q <- atomically $ newTChan t <- getCurrentTime let reader = Reader{httpManager=Nothing ,iterationsleft=num_iterations opts } bot = Bot{socket = stdout ,server = "" ,port = defport ,channel = "" ,botnick = "" ,announcequeue = q ,batchindex = 0 ,lastmsgtime = t } bot' = case ircAddressFromOpts opts of Nothing -> bot Just (IrcAddress s p c n) -> bot{server = s ,port = fromMaybe defport p ,channel = c ,botnick = n } return (opts, reader, bot') -- | Process arguments, join the irc channel, start worker threads, -- disconnect and report when there is a problem. main :: IO () main = withStore "globals" $ do -- for readFeedFile -- http-conduit stuff withSocketsDo $ do -- for ms windows manager <- io $ newManager defaultManagerSettings (opts,reader,bot) <- getRss2IrcArgs let app = App{aOpts=opts,aReader=reader{httpManager=Just manager},aBot=bot} when (delay opts > 0) $ threadDelay $ (delay opts) * minutes runThreads app runThreads :: App -> IO () runThreads app@App{aOpts=opts} = do -- catch any termination or error in sub-threads and handle it here r <- try $ bracket (connect app) disconnect $ \a -> do appvar <- newSharedVar a -- 1. the feed reader thread polls forever or until it reaches max -- iterations or raises a non-transient exception _ <- forkMonitoredIO $ feedReader appvar -- 2. the irc announcer runs until it raises an exception or is killed _ <- forkMonitoredIO $ ircAnnouncer appvar -- 3. the main thread becomes the irc responder, keeping the -- connection alive until it raises or receives an exception ircResponder appvar -- exit after an exception preExitDelay case r of Right _ -> unless (quiet opts) (log "normal termination.") >> exitSuccess -- shouldn't happen Left e | Just ExitSuccess <- fromException e -> unless (quiet opts) (log "normal termination") >> exitSuccess Left e -> unless (quiet opts) (putStr "\n" >> log (show e)) >> exitFailure -- | Spawn a thread which will throw any exception or termination (as -- ExitSuccess) to us. Also log error exceptions. forkMonitoredIO :: IO () -> IO ThreadId forkMonitoredIO action = do me <- myThreadId forkIO $ do ex <- action >> return (toException ExitSuccess) `Control.Exception.catch` \e -> case fromException e of Just ExitSuccess -> return e _ -> log (printf "Error: %s" (show (e::SomeException))) >> return e throwTo me ex -- hack: give announcer a chance to announce items from final poll preExitDelay :: IO () preExitDelay = threadDelay $ 500000 rss2irc-1.2/Base.hs0000644000000000000000000001700613315421120012333 0ustar0000000000000000{-# LANGUAGE PatternGuards, BangPatterns, DeriveDataTypeable, OverloadedStrings, ScopedTypeVariables, TemplateHaskell #-} -- DeriveGeneric, StandaloneDeriving {- | Types and settings. Copyright (c) Don Stewart 2008-2009, Simon Michael 2009-2014 License: BSD3. -} module Base where import Control.Concurrent.STM (TChan) -- import Control.DeepSeq (NFData) import Control.Exception import Control.Monad.IO.Class (MonadIO, liftIO) -- import GHC.Generics import Data.Time () import Data.Time.Clock import Data.Typeable import Network.HTTP.Conduit (Manager) import System.Console.CmdArgs import System.IO (Handle) -- import Text.Feed.Types import Text.Printf (printf) progname = "rss2irc" version = "1.2" -- sync with rss2irc.cabal progversion = progname ++ " " ++ version :: String defport = 6667 defusername = progname defrealname = progname ++ " feed announcer" defuseragent :: String defuseragent = progversion definterval = 5 defidle = 0 defmaxitems = 5 maxmessagelength = 400 -- | Maximum size of each part of our irc announcements. -- The max announcement length will be the sum of these, plus typically 15 -- due to prettification, plus any length increase due to --replace. The -- defaults below should keep most announcements within maxmessagelength -- and all announcements within maxmessagelength * 2 or so. maxtitlelength = 100 maxdesclength = 300 maxauthorlength = 50 maxdatelength = 50 maxlinklength = 200 progname, version, progversion, defusername, defrealname :: String defport, definterval, defidle, defmaxitems, maxmessagelength, maxtitlelength, maxdesclength, maxauthorlength, maxdatelength, maxlinklength :: Int defopts :: Opts defopts = Opts { ident = defrealname &= typ "STR" &= help "set the bot's identity string (useful for contact info)" ,uagent = defuseragent &= typ "STR" &= help "set the bot's Http UserAgent string " ,delay = def &= help "wait for N minutes before starting (helps avoid mass joins)" ,interval = definterval &= name "i" &= help ("polling and announcing interval in minutes (default "++(show definterval)++")") ,cache_control = def &= explicit &= name "cache-control" &= typ "STR" &= help ("set a HTTP cache-control header when polling") ,idle = defidle &= help ("announce only when channel has been idle N minutes (default "++(show defidle)++")") ,max_items = defmaxitems &= help ("announce at most N items per interval (default "++(show defmaxitems)++")") ,recent = def &= name "r" &= help "announce up to N recent items at startup (default 0)" ,ignore_ids_and_times = def &= help "ignore feed item IDs and timestamps (use for feeds with bad ones)" ,allow_duplicates = def &= help "turn off duplicate announcement protection (enabled by default)" ,use_actions = def &= help "use CTCP ACTIONs instead of normal IRC messages" ,no_title = def &= help ("don't show item title (shown by default, up to "++(show maxtitlelength)++" chars)") ,author = def &= name "a" &= help ("show author (up to "++(show maxauthorlength)++" chars)") ,description = def &= name "d" &= help ("show description (up to "++(show maxdesclength)++" chars)") ,link_ = def &= help ("show link URL (up to "++(show maxlinklength)++" chars)") ,time = def &= help ("show timestamp (up to "++(show maxdatelength)++" chars)") ,email = def &= help "show email addresses (stripped by default)" ,html = def &= help "show HTML tags and entities (stripped by default)" ,replace = def &= typ "OLD/NEW" &= help "replace OLD with NEW (regexpr patterns)" ,num_iterations = def &= name "n" &= help "exit after N iterations" ,quiet = def &= help "silence normal console output" ,debug_irc = def &= help "log irc activity" ,debug_feed = def &= help "log feed items and polling stats" ,debug_xml = def &= help "log feed content" ,feed = def &= argPos 0 &= typ "FEEDURL" ,irc_address = def &= argPos 1 &= opt ("" :: String) &= typ "IRCSERVER[:PORT]/CHANNEL/NICK" } &= program progname &= groupname progname &= summary progversion data Opts = Opts { ident :: String ,uagent :: String ,delay :: Int ,interval :: Int ,cache_control :: String ,idle :: Int ,max_items :: Int ,recent :: Int ,ignore_ids_and_times :: Bool ,allow_duplicates :: Bool ,use_actions :: Bool ,no_title :: Bool ,author :: Bool ,description :: Bool ,link_ :: Bool ,time :: Bool ,email :: Bool ,html :: Bool ,replace :: [String] ,num_iterations :: (Maybe Int) ,quiet :: Bool ,debug_irc :: Bool ,debug_feed :: Bool ,debug_xml :: Bool ,feed :: String ,irc_address :: String } deriving (Show, Data, Typeable) data Reader = Reader { httpManager :: !(Maybe Manager) , iterationsleft :: !(Maybe Int) } deriving (Show) instance Show Manager where show = const "(Manager)" data Bot = Bot { socket :: !Handle -- ^ the bot's active IRC connection, or stdout indicating none , server :: !String -- ^ the IRC server's hostname or IP address, or "" indicating none , port :: !Int -- ^ the IRC server's port number , channel :: !String -- ^ the IRC channel to join , botnick :: !String -- ^ the bot's IRC nickname , announcequeue :: !(TChan String) -- ^ a shared queue of messages to be announced , batchindex :: !Int -- ^ how many announcements have been made in the current batch , lastmsgtime :: !UTCTime -- ^ the last time somebody spoke on the IRC channel } instance Show Bot where show Bot{socket=s,server=srv,port=p,channel=c,botnick=n,lastmsgtime=t} = printf "Bot{botnick=%s, socket=%s, server=%s, port=%d, channel=%s, lastmsgtime=%s}" n (show s) srv p c (show t) -- | rss2irc's application state, shared by all threads. data App = App {aOpts :: !Opts -- ^ initial command-line options ,aReader :: !Reader -- ^ the feed reader's state ,aBot :: !Bot -- ^ the irc bot's state } deriving (Show) type FeedAddress = String data FeedParseException = FeedParseException String deriving (Typeable) instance Exception FeedParseException instance Show FeedParseException where show (FeedParseException url) = printf "could not parse feed content from %s" url data IrcAddress = IrcAddress {ircaddrServer :: !String, ircaddrPort :: !(Maybe Int), ircaddrChannel :: !String, ircaddrNick :: !String } deriving (Show) data IrcException = IrcException String deriving (Typeable) instance Exception IrcException instance Show IrcException where show (IrcException msg) = printf "IRC error (%s)" msg -- deriving instance Generic Item -- instance NFData Item io :: MonadIO m => IO a -> m a io = liftIO rss2irc-1.2/Utils.hs0000644000000000000000000002637513315417657012616 0ustar0000000000000000{-# LANGUAGE PatternGuards, BangPatterns, DeriveDataTypeable, OverloadedStrings, ScopedTypeVariables #-} {- | Common utilities. Copyright (c) Don Stewart 2008-2009, Simon Michael 2009-2014 License: BSD3. -} module Utils ( module Utils, module Debug.Trace ) where import Codec.Binary.UTF8.String as UTF8 (decodeString, encodeString, isUTF8Encoded) import Control.Concurrent.MSampleVar import Control.Monad import Data.List import Data.Maybe import qualified Data.Text as T import Data.Time.Clock (UTCTime,getCurrentTime) import Data.Time.Format (parseTimeM) import Data.Time.LocalTime (LocalTime,getCurrentTimeZone,utcToLocalTime) import Prelude hiding (log) import System.Info import System.IO (stdout,hFlush) import Data.Time.Format (defaultTimeLocale) import Text.Feed.Query import Text.Feed.Types (Item) import Text.ParserCombinators.Parsec hiding (label) import Text.Printf (printf) import Text.RegexPR (splitRegexPR,gsubRegexPR) import Base import Debug.Trace -- | trace a showable expression strace :: Show a => a -> a strace a = trace (show a) a -- | labelled trace - like strace, with a label prepended ltrace :: Show a => String -> a -> a ltrace l a = trace (l ++ ": " ++ show a) a -- | monadic trace - like strace, but works as a standalone line in a monad mtrace :: (Monad m, Show a) => a -> m a mtrace a = strace a `seq` return a -- | trace an expression using a custom show function tracewith :: (a -> String) -> a -> a tracewith f e = trace (f e) e -- Light abstraction layer for thread-safe mutable data type Shared a = MSampleVar a newSharedVar :: a -> IO (MSampleVar a) newSharedVar = newSV getSharedVar :: MSampleVar a -> IO a getSharedVar v = do x <- readSV v writeSV v x return x putSharedVar :: MSampleVar a -> a -> IO () putSharedVar v x = writeSV v x -- Option parsing helpers ircAddressFromOpts :: Opts -> Maybe IrcAddress ircAddressFromOpts Opts{irc_address=""} = Nothing ircAddressFromOpts Opts{irc_address=a} = Just $ parseIrcAddress a parseIrcAddress :: String -> IrcAddress parseIrcAddress a = either (\e -> opterror $ printf "could not parse IRC address \"%s\"\n%s\n" a (show e)) id $ parse ircaddrp "" a where ircaddrp = choice' $ [ do -- pre 0.5 syntax: [irc://]NICK@IRCSERVER[:PORT]/[#]CHANNEL optional $ choice' $ map string ["irc://", "irc:"] n <- many1 $ noneOf "@" char '@' s <- many1 $ noneOf ":/" p <- optionMaybe $ char ':' >> read `fmap` many1 digit >>= return char '/' optional $ char '#' c <- many1 $ noneOf "/ \t" eof return $ IrcAddress s p ('#':c) n , do -- new easier syntax: [irc://]IRCSERVER[:PORT]/[#]CHANNEL/NICK optional $ choice' $ map string ["irc://", "irc:"] s <- many1 $ noneOf ":/" p <- optionMaybe $ char ':' >> read `fmap` many1 digit >>= return char '/' optional $ char '#' c <- many1 $ noneOf "/" char '/' n <- many1 $ noneOf "/ \t" eof return $ IrcAddress s p ('#':c) n ] -- | A version of error' that suggests --help. opterror :: String -> a opterror = error' . (++ " (see --help for usage)") -- | A version of error that's better at displaying unicode. error' :: String -> a error' = error . toPlatformString -- | Convert a feed item to a string for the bot to announce on irc. -- The announcement is likely but not guaranteed to fit within a -- single irc message. toAnnouncement:: Opts -> Item -> String toAnnouncement opts i = applyReplacements opts $ printf "%s%s%s%s%s" title desc author' date link' where title = unlessopt no_title $ maybe "" (truncateWordsAt maxtitlelength "..." . clean) (T.unpack <$> getItemTitle i) desc = ifopt description $ maybe "" ((" - "++) . truncateWordsAt maxdesclength "..." . clean) (T.unpack <$> getItemDescription i) author' = ifopt author $ maybe "" ((" "++) . parenthesise . truncateWordsAt maxauthorlength "..." . clean) (T.unpack <$> getItemAuthor i) date = ifopt time $ maybe "" ((" "++) . truncateAt maxdatelength "..." . clean) (T.unpack <$> getItemDate i) link' = ifopt link_ $ maybe "" ((" "++) . truncateAt maxlinklength "..." . clean) (T.unpack <$> getItemLink i) clean = oneline . trimwhitespace . striphtml . stripemail ifopt o = if o opts then id else const "" unlessopt o = if not $ o opts then id else const "" oneline = intercalate " " . map strip . lines -- two spaces to hint at newlines & brs trimwhitespace = gsubRegexPR "[ \t][ \t]+" " " striphtml = if html opts then id else stripHtml . brtonewline brtonewline = gsubRegexPR "(<|<) *br */?(>|>)" "\n" stripemail = if email opts then id else stripEmails parenthesise = (++")").("("++) -- | Split an announcement into one or more suitably truncated and -- formatted irc messages. Each call returns the next message and -- the remainder of the announcement. -- XXX n must be > length continuationsuffix splitAnnouncement :: String -> (String,String) splitAnnouncement a | length a <= maxmessagelength = (a,"") | otherwise = case splitAtWordBefore n a of (m,rest@(_:_)) -> (m++continuationsuffix, continuationprefix++rest) (m,"") -> (m, "") where n = maxmessagelength - length continuationsuffix continuationprefix, continuationsuffix :: String continuationprefix = "... " continuationsuffix = " ..." -- | Truncate a string, if possible at a word boundary, at or before -- the specified position, and indicate truncation with the specified -- suffix. The length of the returned string will be in the range -- n, n+length suffix. truncateWordsAt :: Int -> String -> String -> String truncateWordsAt n suffix s | s' == s = s | otherwise = s' ++ suffix where s' = fst $ splitAtWordBefore n s -- | Truncate a string at the specified position, and indicate -- truncation with the specified suffix. The length of the returned -- string will be in the range n, n+length suffix. truncateAt :: Int -> String -> String -> String truncateAt n suffix s | s' == s = s | otherwise = s' ++ suffix where s' = take n s -- | Split a string at or before the specified position, on a word boundary if possible. splitAtWordBefore :: Int -> String -> (String,String) splitAtWordBefore n s | null a || (null b) = (rstrip a, lstrip b) | last a == ' ' || (head b == ' ') || (not $ ' ' `elem` a) = (rstrip a, lstrip b) | otherwise = (rstrip $ take (length a - length partialword) a, partialword ++ lstrip b) where (a,b) = splitAt n s partialword = reverse $ takeWhile (/= ' ') $ reverse a -- | Apply all --replace substitutions to a string, in turn. -- Warning, will fail at runtime if there is a bad regexp. applyReplacements :: Opts -> String -> String applyReplacements opts = foldl' (.) id (reverse substitutions) where substitutions = map replaceOptToSubst $ replace opts replaceOptToSubst s = case splitRegexPR "(? gsubRegexPR pat sub _ -> id -- | Replace any HTML tags or entities in a string with a single space. stripHtml :: String -> String stripHtml = gsubRegexPR "(&[^ \t]*?;|<.*?>)" " " -- | Remove any email addresses from a string. stripEmails :: String -> String stripEmails = gsubRegexPR "(?i) ?(<|<)?\\b[-._%+a-z0-9]+@[-.a-z0-9]+\\.[a-z]{2,4}\\b(>|>)?" "" maybeRead :: Read a => String -> Maybe a maybeRead s = case reads s of [(x, _)] -> Just x _ -> Nothing decrementMaybe :: Enum a => Maybe a -> Maybe a decrementMaybe = maybe Nothing (Just . pred) -- | Parse a datetime string if possible, trying at least the formats -- likely to be used in RSS/Atom feeds. parseDateTime :: String -> Maybe UTCTime parseDateTime s = firstJust [parseTimeM True defaultTimeLocale f s' | f <- formats] where s' = adaptForParseTime s adaptForParseTime = gsubRegexPR "(....-..-..T..:..:..[\\+\\-]..):(..)" "\\1\\2" -- 2009-09-22T13:10:56+00:00 formats = -- http://hackage.haskell.org/packages/archive/time/1.1.4/doc/html/Data-Time-Format.html#v%3AformatTime [ "%a, %d %b %Y %T %z" -- Fri, 18 Sep 2009 12:42:07 -0400 ,"%a, %d %b %Y %T %Z" -- Fri, 25 Sep 2009 11:01:23 UTC ,"%Y-%m-%dT%T%z" -- 2009-09-22T13:10:56+0000 ] firstJust :: [Maybe a] -> Maybe a firstJust ms = case dropWhile isNothing ms of (m:_) -> m _ -> Nothing -- | Grammatically correct "every N minutes". everyMinutesString :: Int -> String everyMinutesString 1 = "every minute" everyMinutesString i = "every " ++ show i ++ " minutes" -- | Grammatically correct "in N minutes". inMinutesString :: Int -> String inMinutesString 1 = "in 1 minute" inMinutesString i = "in " ++ show i ++ " minutes" -- | Log some text to the console with a timestamp. log :: String -> IO () log s = do t <- getTimeStamp putStrLn $ printf "%s: %s" t s hFlush stdout -- | Decorate some multi-line text with a label and start/end separators. labelledText :: String -> String -> String labelledText label s = printf "========== %s:\n%s\n=============================================\n" label s getCurrentLocalTime :: IO LocalTime getCurrentLocalTime = do t <- getCurrentTime tz <- getCurrentTimeZone return $ utcToLocalTime tz t getTimeStamp :: IO String getTimeStamp = do t <- getCurrentLocalTime tz <- getCurrentTimeZone return $ printf "%s %s" (take 19 $ show t) (show tz) hours, minutes, seconds :: Int hours = 60 * minutes minutes = 60 * seconds seconds = 10^(6::Int) strip, lstrip, rstrip, dropws :: String -> String strip = lstrip . rstrip lstrip = dropws rstrip = reverse . dropws . reverse dropws = dropWhile (`elem` (" \t"::String)) chomp :: String -> String chomp = reverse . dropWhile (`elem` ("\n\r"::String)) . reverse isLeft :: Either a b -> Bool isLeft (Left _) = True isLeft _ = False -- platform strings -- | A platform string is a string value from or for the operating system, -- such as a file path or command-line argument (or environment variable's -- name or value ?). On some platforms (such as unix) these are not real -- unicode strings but have some encoding such as UTF-8. This alias does -- no type enforcement but aids code clarity. type PlatformString = String -- | Convert a possibly encoded platform string to a real unicode string. -- We decode the UTF-8 encoding recommended for unix systems -- (cf http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html) -- and leave anything else unchanged. fromPlatformString :: PlatformString -> String fromPlatformString s = if UTF8.isUTF8Encoded s then UTF8.decodeString s else s -- | Convert a unicode string to a possibly encoded platform string. -- On unix we encode with the recommended UTF-8 -- (cf http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html) -- and elsewhere we leave it unchanged. toPlatformString :: String -> PlatformString toPlatformString = case os of "unix" -> UTF8.encodeString "linux" -> UTF8.encodeString "darwin" -> UTF8.encodeString _ -> id -- | Backtracking choice, use this when alternatives share a prefix. -- Consumes no input if all choices fail. choice' :: [GenParser tok st a] -> GenParser tok st a choice' = choice . map Text.ParserCombinators.Parsec.try rss2irc-1.2/Feed.hs0000644000000000000000000002766513315420267012353 0ustar0000000000000000{-# LANGUAGE PatternGuards, BangPatterns, DeriveDataTypeable, OverloadedStrings, ScopedTypeVariables #-} {- | Feed stuff. Copyright (c) Don Stewart 2008-2009, Simon Michael 2009-2014 License: BSD3. -} module Feed where import Control.Concurrent (threadDelay) import Control.Concurrent.STM import Control.Exception import Control.Monad import Control.Monad.Trans.Resource (ResourceT, runResourceT) import qualified Data.ByteString.Char8 as B8 import qualified Data.ByteString.Lazy.Char8 as LB8 import Data.Maybe import Data.List import qualified Data.Text as T import System.IO.Storage import Network.HTTP.Conduit import Network.HTTP.Types (hCacheControl, hUserAgent) import Network.URI import Prelude hiding (log) import Safe import System.IO (stdout,hFlush) import Text.Feed.Import import Text.Feed.Query import Text.Feed.Types import Text.Printf (printf) import Text.RegexPR import Base import Utils -- deriving instance Eq Item instance Eq Item where (==) a b = let match f = f a == f b in all match [getItemTitle ,getItemLink ,getItemPublishDateString ,getItemDate ,getItemAuthor ,getItemCommentLink ,getItemFeedLink ,getItemRights ,getItemSummary ,getItemDescription ] && match getItemCategories && match getItemEnclosure && match getItemId -- | Poll the feed every interval minutes, ignoring transient IO -- errors, detecting announceable items and sending them to the -- announcer thread, forever or until the specified maximum number of -- iterations is reached. -- -- New item detection: this must be done carefully to avoid spamming -- IRC users with useless messages. The content fetched from real-world -- feeds may jitter due to http caching, unstable item ordering, -- unpredictable or missing item dates, etc. We support several strategies: -- -- - @topnew@: announce new unseen items at the top. -- In more detail: assume that feeds provide items sorted newest first. -- Then, announceable items are the new (newer pub date than the last -- announced item) and unseen (id not among the last N ids seen since -- startup) items at the top of the feed. This is the default strategy, -- best for most feeds. -- -- - @allnew@: announce new unseen items appearing anywhere in the feed. -- Good for feeds with unreliable item ordering, or to notice the items of -- feeds newly added to a planet (aggregator). -- -- - @top@: announce items appearing above the previous top item, new or not. -- Good for feeds not ordered by date, eg a darcs repo's. -- XXX none of these work for announcing recent-but-not-newest items from a blog added to a planet feedReader :: Shared App -> IO () feedReader !appvar = do -- first poll - prime the pump app@App{aOpts=opts@Opts{feed=url}, aReader=Reader{httpManager=mmanager,iterationsleft=numleft}, aBot=Bot{announcequeue=q}} <- getSharedVar appvar case numleft of Just 0 -> return () _ -> do unless (quiet opts) $ log $ printf "Polling %s %s" url (everyMinutesString $ interval opts) fetched <- fetchItems (fromJust mmanager) url opts let polls = 1 -- with --recent N, send last N (non-duplicate) items to announcer thread let unique = (if allow_duplicates opts then id else (elideDuplicates [])) fetched announceable = take (recent opts) unique numannounced = fromIntegral $ length announceable writeList2Chan q $ map (toAnnouncement opts) $ reverse announceable when (debug_feed opts) $ logPoll fetched announceable polls numannounced -- start iterating let seen = map (\i -> (itemId i, fromMaybe "" $ T.unpack <$> getItemTitle i)) fetched lastpubdate = maybe Nothing (fmap T.unpack . getItemPublishDateString) $ headMay unique putSharedVar appvar $ maybeDecrementIterationsLeft app feedReaderPoll appvar polls seen lastpubdate numannounced feedReaderPoll :: Shared App -> Integer -> [(String,String)] -> Maybe String -> Integer -> IO () feedReaderPoll !appvar !polls !seen !lastpubdate !numannounced = do -- second & subsequent polls - wait interval then look for new items app@App{aOpts=opts@Opts{feed=url}, aReader=Reader{httpManager=mmanager,iterationsleft=numleft}, aBot=Bot{announcequeue=q}} <- getSharedVar appvar case numleft of Just 0 -> return () _ -> do threadDelay $ (interval opts) * minutes when (debug_feed opts) $ log $ printf "polling %s" url fetched <- fetchItems (fromJust mmanager) url opts -- detect announceable items let seenids = map fst seen hasunseenid = (`notElem` seenids).itemId hasnewerdate = (`isNewerThan` lastpubdate).fmap T.unpack.getItemPublishDateString isunseenandnewer i = hasnewerdate i && hasunseenid i isprevioustop = (== head seenids).itemId announceable = (if allow_duplicates opts then id else (elideDuplicates seen)) $ reverse $ (if ignore_ids_and_times opts then takeWhile (not . isprevioustop) else filter isunseenandnewer) $ fetched -- send to announcer thread and iterate writeList2Chan q $ map (toAnnouncement opts) announceable let polls' = polls + 1 seen' = take windowsize $ (map (\i -> (itemId i, fromMaybe "" $ T.unpack <$> getItemTitle i)) fetched) ++ seen where windowsize = 200 lastpubdate' = maybe lastpubdate (fmap T.unpack.getItemPublishDateString) $ headMay announceable numannounced' = numannounced + fromIntegral (length announceable) putSharedVar appvar $ maybeDecrementIterationsLeft app when (debug_feed opts) $ logPoll fetched announceable polls' numannounced' feedReaderPoll appvar polls' seen' lastpubdate' numannounced' maybeDecrementIterationsLeft :: App -> App maybeDecrementIterationsLeft app@App{aReader=reader@Reader{iterationsleft=n}} = app{aReader=reader{iterationsleft=decrementMaybe n}} -- | Log debug info for a poll. logPoll :: [Item] -> [Item] -> Integer -> Integer -> IO () logPoll fetched announceable polls numannounced = do printItemDetails "feed items, in feed order" fetched printItemDetails "announceable items, oldest first" announceable _ <- printf "successful consecutive polls, items announced: %10d %10d\n" polls numannounced hFlush stdout -- | Fetch a feed's items, or the empty list in case of transient IO -- errors (and log those). fetchItems :: Manager -> FeedAddress -> Opts -> IO [Item] fetchItems manager url opts = runResourceT (feedItems `fmap` readFeed manager url opts) `catches` [Handler $ \(e :: IOException) -> handleFetchError e ,Handler $ \(e :: HttpException) -> handleFetchError e ,Handler $ \(e :: FeedParseException) -> handleFetchError e ] where handleFetchError e = do log $ printf "Error (%s), retrying %s" (show e) (inMinutesString $ interval opts) return [] -- | Fetch and parse a feed's content, or raise an exception. readFeed :: Manager -> FeedAddress -> Opts -> ResourceT IO Feed readFeed manager url opts = do s <- readUri manager url opts when (debug_xml opts) $ io $ log $ labelledText (printf "FEED CONTENT FROM %s " url) s case parseFeedString s of Nothing -> io $ throwIO $ FeedParseException url Just (XMLFeed _) -> io $ throwIO $ FeedParseException url Just f -> return f -- | Fetch the contents of a uri, which must be an ascii string. -- Redirects, authentication, https: and file: uris are supported. readUri :: Manager -> String -> Opts -> ResourceT IO String readUri manager s opts = case parseURI' s of Just URI{uriScheme="file:",uriPath=f} -> io $ readFeedFile f Just _ -> do -- LB8.unpack `fmap` simpleHttp s -- http-conduit is complex, cf https://github.com/snoyberg/http-conduit/issues/97 r <- parseUrlThrow s let cachecontrol = cache_control opts r' | null cachecontrol = r | otherwise = r{requestHeaders=(hCacheControl, B8.pack cachecontrol):requestHeaders r} r'' = r'{requestHeaders=(hUserAgent, B8.pack $ uagent opts):requestHeaders r'} rsp <- httpLbs r'' manager return $ LB8.unpack $ responseBody rsp Nothing -> opterror $ "could not parse URI: " ++ s -- | Parse a string to a URI, ensuring a simple filename is assigned the file: scheme. parseURI' :: String -> Maybe URI parseURI' s = case parseURIReference s of Just u -> Just $ u `relativeTo` nullURI{uriScheme="file:",uriPath="."} Nothing -> Nothing -- | A hacky stateful readFile to assist testing: this reads one or -- more concatenated copies of the feed from the file and returns one -- on each call, or the empty string when there are none left. -- Reads from stdin if the file path is "-". readFeedFile :: FilePath -> IO String readFeedFile f = do v <- getValue "globals" "feedfile" case v of Nothing -> do s <- case f of "-" -> getContents _ -> readFile f let (first:rest) = splitFeedCopies s putValue "globals" "feedfile" rest return first Just (first:rest) -> do putValue "globals" "feedfile" rest return first Just [] -> return "" where splitFeedCopies = initDef [""] . map (++"\n") . splitRegexPR "(?i)\n*" -- | Check if the first date is newer than the second, where dates (from -- feed items) can be Nothing, a parseable date string or unparseable. In -- the (likely) event we can't parse two dates, return True. isNewerThan :: Maybe String -> Maybe String -> Bool isNewerThan _ Nothing = True isNewerThan Nothing _ = True isNewerThan (Just s2) (Just s1) = case (parseDateTime s2, parseDateTime s1) of (Just d2, Just d1) -> d2 > d1 _ -> True -- | Remove any items from the list which duplicate another item in -- this or the second list (the last N fetched items), where -- "duplicates" means "would generate a similar irc message", ie it -- has the same item title. This is a final de-duplication pass before -- announcing on irc. elideDuplicates :: [(String,String)] -> [Item] -> [Item] elideDuplicates seen new = filter (\a -> not $ fromMaybe "" (T.unpack <$> getItemTitle a) `elem` seentitles) $ nubBy (\a b -> getItemTitle a == getItemTitle b) new where seentitles = map snd seen -- | Get the best available unique identifier for a feed item. itemId :: Item -> String itemId i = case getItemId i of Just (_,t) -> T.unpack t Nothing -> case getItemTitle i of Just t -> T.unpack t Nothing -> case getItemDate i of Just t -> T.unpack t Nothing -> show i -- | Dump item details to the console for debugging. printItemDetails :: String -> [Item] -> IO () printItemDetails hdr is = printf "%s: %d\n%s" hdr count items >> hFlush stdout where items = unlines [printf " %-29s%s %-*s" d p twidth t | (d,p,t,_) <- fields] twidth = maximum $ map (length.fromMaybe "".fmap T.unpack.getItemTitle) is -- subhdr = "(date, (publish date if different), title)\n" -- subhdr' = if null is then "" else subhdr count = length is fields = [(d, if p==d then "" :: String else printf " pubdate:%-29s" p, t, i) | item <- is ,let d = fromMaybe "" $ getItemDate item ,let p = fromMaybe "" $ getItemPublishDateString item ,let t = fromMaybe "" $ getItemTitle item ,let i = maybe "" show $ getItemId item ] writeList2Chan :: TChan a -> [a] -> IO () writeList2Chan q as = do atomically $ forM as $ \a -> writeTChan q a return () rss2irc-1.2/Irc.hs0000644000000000000000000002027013315415236012206 0ustar0000000000000000{-# LANGUAGE PatternGuards, BangPatterns, DeriveDataTypeable, OverloadedStrings, ScopedTypeVariables #-} {- | IRC stuff Copyright (c) Don Stewart 2008-2009, Simon Michael 2009-2014 License: BSD3. -} module Irc where import Control.Concurrent (threadDelay) import Control.Concurrent.STM import Control.Exception import Control.Monad import Data.ByteString.Char8 as B8 (pack, unpack) import Data.List import Data.Maybe import Data.Time.Clock (getCurrentTime,diffUTCTime) import Network (PortID(PortNumber), connectTo) import Network.IRC (Message(Message),msg_command,msg_params,decode,encode,joinChan,privmsg) import Prelude hiding (log) import System.IO (BufferMode(NoBuffering),stdout,hSetBuffering,hFlush,hClose,hGetLine,hPutStr) import Text.Printf import Base import Utils -- | Connect to the irc server. connect :: App -> IO App connect !app@App{aOpts=opts, aBot=bot@Bot{server=srv,port=p,channel=c,botnick=n}} = do unless (quiet opts) $ log $ n ++ " connecting to " ++ (if null srv then "(simulated)" else printf "%s, channel %s" srv c) bot' <- if null srv then return bot else do h <- connectTo srv (PortNumber $ fromIntegral p) hSetBuffering h NoBuffering return bot{socket=h} ircWrite opts bot' n ircWrite opts bot' $ if null (ident opts) then defusername else ident opts (connected,err) <- if null srv then return (True,"") else ircWaitForConnectConfirmation opts bot' -- some servers require this unless connected $ throw $ IrcException err ircWrite opts bot' $ B8.unpack $ encode $ joinChan $ B8.pack c unless (quiet opts) $ log "connected." return app{aBot=bot'} -- | Disconnect from the irc server, if connected. disconnect :: App -> IO () disconnect App{aBot=Bot{server=srv,socket=s}} | s == stdout = return () | otherwise = log (printf "disconnecting from %s" srv) >> hClose s -- | Wait for server connection confirmation. ircWaitForConnectConfirmation :: Opts -> Bot -> IO (Bool,String) ircWaitForConnectConfirmation _ Bot{server=""} = return (True,"") ircWaitForConnectConfirmation !opts !bot@Bot{socket=h} = do s <- hGetLine h when (debug_irc opts) $ log $ printf "<-%s" s if isPing s then ircPong opts bot s >> ircWaitForConnectConfirmation opts bot else if isResponseOK s then return (True, chomp s) else if isNotice s then ircWaitForConnectConfirmation opts bot else return (False, chomp s) where parseRespCode x = if length (words x) > 1 then (words x) !! 1 else "000" isResponseOK x = (parseRespCode x) `elem` [ "001", "002", "003", "004" ] isNotice x = (head $ parseRespCode x) `elem` ('0':['a'..'z']++['A'..'Z']) {- 2011-10-18 13:28:20 PDT: <-PING :niven.freenode.net 2011-10-18 13:28:20 PDT: ->PONG niven.freenode.net hGetIRCLine :: Handle -> IO MsgString Read an IRC message string. hGetMessage :: Handle -> IO Message Read the next valid IRC message. hPutCommand :: Handle -> Command -> IO () Write an IRC command with no origin. hPutMessage :: Handle -> Message -> IO () Write an IRC message. -} -- | Run forever, responding to irc PING commands to keep the bot connected. -- Also keeps track of the last time a message was sent, for --idle. ircResponder :: Shared App -> IO () ircResponder !appvar = do app@App{aOpts=opts,aBot=bot@Bot{server=srv,socket=h}} <- getSharedVar appvar if null srv then threadDelay (maxBound::Int) else do s <- hGetLine h let s' = init s when (debug_irc opts) $ log $ printf "<-%s" s' let respond | isMessage s = do t <- getCurrentTime putSharedVar appvar app{aBot=bot{lastmsgtime=t}} | isPing s = ircPong opts bot s' | otherwise = return () respond ircResponder appvar -- | Run forever, printing announcements appearing in the bot's announce -- queue to its irc channel, complying with bot and irc server policies. -- Specifically: -- -- - no messages until --idle minutes of silence on the channel -- -- - no more than 400 chars per message -- -- - no more than one message per 2s -- -- - no more than --max-items feed items announced per polling interval -- -- - no more than --max-items messages per polling interval, except a -- final item split across multiple messages will be completed. -- XXX On freenode, six 400-char messages in 2s can still cause a flood. -- Try limiting chars-per-period, or do ping-pong ? ircAnnouncer :: Shared App -> IO () ircAnnouncer !appvar = do -- wait for something to announce App{aBot=Bot{announcequeue=q}} <- getSharedVar appvar ann <- atomically $ readTChan q -- re-read bot to get an up-to-date idle time app@App{aOpts=opts, aBot=bot@Bot{server=srv,batchindex=i}} <- getSharedVar appvar idletime <- channelIdleTime bot let batchsize = max_items opts requiredidle = idle opts -- minutes pollinterval = interval opts -- minutes sendinterval = if null srv then 0 else 2 -- seconds iscontinuation = continuationprefix `isPrefixOf` ann go | i >= batchsize && not iscontinuation = do -- reached max batch size, sleep when (debug_irc opts) $ log $ printf "sent %d messages in this batch, max is %d, sleeping for %dm" i batchsize pollinterval threadDelay $ pollinterval * minutes atomically $ unGetTChan q ann putSharedVar appvar app{aBot=bot{batchindex=0}} ircAnnouncer appvar | requiredidle > 0 && (idletime < requiredidle) = do -- not yet at required idle time, sleep let idleinterval = requiredidle - idletime when (debug_irc opts) $ log $ printf "channel has been idle %dm, %dm required, sleeping for %dm" idletime requiredidle idleinterval threadDelay $ idleinterval * minutes atomically $ unGetTChan q ann ircAnnouncer appvar | otherwise = do -- ok, announce it when (debug_irc opts) $ do let s | requiredidle == 0 = "" :: String | otherwise = printf " and channel has been idle %dm" idletime log $ printf "sent %d messages in this batch%s, sending next" i s let (a,rest) = splitAnnouncement ann when (not $ null rest) $ atomically $ unGetTChan q rest ircPrivmsg opts bot a threadDelay $ sendinterval * seconds putSharedVar appvar app{aBot=bot{batchindex=i+1}} ircAnnouncer appvar go -- | The time in minutes since the last message on this bot's channel, or -- otherwise since joining the channel. Leap seconds are ignored. channelIdleTime :: Bot -> IO Int channelIdleTime (Bot{lastmsgtime=t1}) = do t <- getCurrentTime return $ round (diffUTCTime t t1) `div` 60 -- IRC utils -- | Send a response to the irc server's ping. ircPong :: Opts -> Bot -> String -> IO () ircPong opts b x = ircWrite opts b $ printf "PONG :%s" (drop 6 x) -- | Send a privmsg to the bot's irc server & channel, and to stdout unless --quiet is in effect. ircPrivmsg :: Opts -> Bot -> String -> IO () ircPrivmsg opts bot@(Bot{channel=c}) msg = do ircWrite opts bot $ B8.unpack $ encode $ privmsg (B8.pack c) (B8.pack msg') unless (quiet opts) $ putStrLn msg >> hFlush stdout where msg' | use_actions opts = "\1ACTION " ++ msg ++ "\1" | otherwise = msg -- | Send a message to the bot's irc server, and log to the console if --debug-irc is in effect. ircWrite :: Opts -> Bot -> String -> IO () ircWrite opts (Bot{server=srv,socket=h}) s = do when (debug_irc opts) $ log $ printf "->%s" s -- (B8.unpack $ showCommand c) unless (null srv) $ hPutStr h (s++"\r\n") isMessage :: String -> Bool isMessage s = isPrivmsg s && not ("VERSION" `elem` (maybe [] msg_params $ decode $ B8.pack s)) isPrivmsg :: String -> Bool isPrivmsg s = case decode $ B8.pack s of Just Message{msg_command="PRIVMSG"} -> True _ -> False isPing :: String -> Bool isPing s = case decode $ B8.pack s of Just Message{msg_command="PING"} -> True _ -> False rss2irc-1.2/LICENSE0000644000000000000000000000272211704673077012155 0ustar0000000000000000Copyright (c) 2008, 2009 Don Stewart, 2009 Simon Michael All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the author nor the names of his contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. rss2irc-1.2/Setup.lhs0000644000000000000000000000011411704673077012751 0ustar0000000000000000#!/usr/bin/env runhaskell > import Distribution.Simple > main = defaultMain rss2irc-1.2/rss2irc.cabal0000644000000000000000000000522413315421171013505 0ustar0000000000000000name: rss2irc -- also set version in Base.hs version: 1.2 homepage: http://hackage.haskell.org/package/rss2irc license: BSD3 license-file: LICENSE author: Don Stewart , Simon Michael maintainer: Simon Michael category: IRC synopsis: watches an RSS/Atom feed and writes it to an IRC channel description: rss2irc is an IRC bot that polls an RSS or Atom feed and announces updates to an IRC channel, with options for customizing output and behavior. It aims to be an easy-to-use, reliable, well-behaved bot. . Usage: @rss2irc FEEDURL [BOTNAME\@IRCSERVER/#CHANNEL] [OPTS]@ . For example, to announce Hackage uploads (like hackagebot): . > $ rss2irc http://hackage.haskell.org/packages/archive/recent.rss mybot@irc.freenode.org/#haskell stability: beta tested-with: GHC==8.0 cabal-version: >= 1.10 build-type: Simple extra-source-files: CHANGES README.md stack.yaml executable rss2irc main-is: rss2irc.hs other-modules: Base, Utils, Feed, Irc default-language: Haskell2010 ghc-options: -threaded -Wall -fno-warn-orphans -fno-warn-unused-do-bind build-depends: base >= 4 && < 5 ,SafeSemaphore >= 0.10 && < 1.1 ,bytestring ,cmdargs ,containers ,deepseq ,irc >= 0.6 && < 0.7 ,feed >= 1 && < 1.1 ,http-client >= 0.2.1 && < 0.6 ,http-conduit >= 1.9 && < 2.4 ,resourcet >= 0.4.4 && < 1.3 ,http-types >= 0.6.4 && < 1.0 ,io-storage >= 0.3 && < 0.4 ,network >= 2.6 && < 2.7 ,network-uri >= 2.6 && < 2.7 ,old-locale ,parsec ,regexpr >= 0.5 && < 0.6 ,safe >= 0.2 && < 0.4 ,split >= 0.2 && < 0.3 ,stm >= 2.1 && < 3.0 ,text >= 0.11 && < 1.3 ,transformers >= 0.2 && < 0.6 ,time >= 1.5 && < 1.10 ,utf8-string ,SafeSemaphore source-repository head type: darcs location: http://hub.darcs.net/simon/rss2irc rss2irc-1.2/CHANGES0000644000000000000000000000352013315421466012131 0ustar00000000000000001.2 (2018/06/29) * build with latest stackage LTS, feed 1.0.0.0 (#4) 1.1 (2016/11/1) * update for GHC 8, stackage lts-7 * allow latest http-client, http-conduit, resourcet, network, transformers, text; drop cabal-file-th * update to irc 0.6 (warning: probably not unicode-safe) * use STM's TChan, fixing warnings and possibly hangs * use safer MSampleVar, possibly fixing hangs 1.0.6 (2014/4/13) * minimal changes to build with feed 0.3.9.* and other libs in current Debian unstable (sid) 1.0.5 (2014/2/27) * avoid feed 0.3.9.2 which has changed its API 1.0.4 (2013/9/5) * fix compilation with GHC 7.4 (Fabien Andre) 1.0.3 (2013/2/22) * fix http-conduit usage so the feed poller doesn't die within hours 1.0.2 (2013/2/18) * `--use-actions` works again 1.0.1 (2013/2/15) * fix release notes formatting on hackage 1.0 (2013/2/15) New: * more robust item detection and duplicate announcement protection, with simpler options * easier irc address syntax, drop -p/--port option * can poll https feeds * can poll from stdin (-) * can poll a file containing multiple copies of a feed (eg for testing) * `--cache-control` option sets a HTTP Cache-Control header * `--use-actions` announces with CTCP ACTIONs (like the /me command) Fixed: * updated for GHC 7.6 & current libs * initialises http properly on microsoft windows * builds threaded and optimised by default * thread and error handling is more robust, eg don't ignore exceptions in the irc writer thread * can poll urls with semicolon parameter separator (eg darcsweb's) * can announce item urls containing percent * no longer adds stray "upload:" to IRC messages * renamed --dupe-descriptions to `--allow-duplicates` * dropped --debug flag * new item detection and announcing is more robust * announcements on console are clearer * a simulated irc connection is not logged unless --debug-irc is used rss2irc-1.2/README.md0000644000000000000000000000133012347142402012404 0ustar0000000000000000rss2irc is an IRC bot that polls an RSS or Atom feed and announces updates to an IRC channel, with options for customizing output and behavior. It aims to be an easy-to-use, reliable, well-behaved bot. Usage: `rss2irc [OPTIONS] FEEDURL [IRCSERVER[:PORT]/[#]CHANNEL/NICK]` Example: $ rss2irc http://hackage.haskell.org/packages/archive/recent.rss irc.freenode.net/mychannel/mybot Known limitations: - If the feed goes down for a while and then comes back up, the bot may re-announce all items. - Memory is leaked on each poll, causing rss2irc bots to consume more memory over time. This will be more noticeable when feeds have large content and are polled frequently. Restarting rss2irc bots daily is recommended. rss2irc-1.2/stack.yaml0000644000000000000000000000014113315415762013125 0ustar0000000000000000resolver: lts-11.15 packages: - '.' #flags: {} extra-deps: - mtlparse-0.1.4.0 - regexpr-0.5.4