feed2imap-1.3.4/0000755000004100000410000000000014731252744013414 5ustar www-datawww-datafeed2imap-1.3.4/data/0000755000004100000410000000000014731252744014325 5ustar www-datawww-datafeed2imap-1.3.4/data/doc/0000755000004100000410000000000014731252744015072 5ustar www-datawww-datafeed2imap-1.3.4/data/doc/feed2imap/0000755000004100000410000000000014731252744016726 5ustar www-datawww-datafeed2imap-1.3.4/data/doc/feed2imap/examples/0000755000004100000410000000000014731252744020544 5ustar www-datawww-datafeed2imap-1.3.4/data/doc/feed2imap/examples/feed2imaprc0000644000004100000410000000711514731252744022654 0ustar www-datawww-data# Global options: # max-failures: maximum number of failures allowed before they are reported in # normal mode (default 10). By default, failures are only visible in verbose # mode. Most feeds tend to suffer from temporary failures. # dumpdir: (for debugging purposes) directory where all fetched feeds will be # dumped. # debug-updated: (for debugging purposes) if true, display a lot of information # about the "updated-items" algorithm. # include-images: download images and include them in the mail? (true/false) # reupload-if-updated: when an item is updated, and was previously deleted, # reupload it? (true/false, default true) # default-email: default email address in the format foo@example.com # disable-ssl-verification: disable SSL certification when connecting # to IMAPS accounts (true/false) # timeout: time before getting timeout when fetching feeds (default 30) in seconds # # Per-feed options: # name: name of the feed (must be unique) # url: HTTP[S] address where the feed has to be fetched # target: the IMAP URI where to put emails. Should start with imap:// for IMAP, # imaps:// for IMAPS and maildir:// for a path to a local maildir. # min-frequency: (in HOURS) is the minimum frequency with which this particular # feed will be fetched # disable: if set to something, the feed will be ignored # include-images: download images and include them in the mail? (true/false) # reupload-if-updated: when an item is updated, and was previously deleted, # reupload it? (true/false, default true) # always-new: feed2imap tries to use a clever algorithm to determine whether # an item is new or has been updated. It doesn't work well with some web apps # like mediawiki. When this flag is enabled, all items which don't match # exactly a previously downloaded item are considered as new items. # ignore-hash: Some feeds change the content of their items all the time, so # feed2imap detects that they have been updated at each run. When this flag # is enabled, feed2imap ignores the content of an item when determining # whether the item is already known. # dumpdir: (for debugging purposes) directory where all fetched feeds will be # dumped. # Snownews/Liferea scripts support : # execurl: Command to execute that will display the RSS/Atom feed on stdout # filter: Command to execute which will receive the RSS/Atom feed on stdin, # modify it, and output it on stdout. # For more information: http://kiza.kcore.de/software/snownews/snowscripts/ # # # If your login contains an @ character, replace it with %40. Other reserved # characters can be escaped in the same way (see man ascii to get their code) feeds: - name: feed2imap url: http://home.gna.org/feed2imap/feed2imap.rss target: imap://luser:password@imap.apinc.org/INBOX.Feeds.Feed2Imap - name: lucas url: http://www.lucas-nussbaum.net/blog/?feed=rss2 target: imap://luser:password@imap.apinc.org/INBOX.Feeds.Lucas - name: JabberFrWiki url: http://wiki.jabberfr.org/index.php?title=Special:Recentchanges&feed=rss target: imaps://luser:password@imap.apinc.org/INBOX.Feeds.JabberFR always-new: true - name: LeMonde execurl: "wget -q -O /dev/stdout http://www.lemonde.fr/rss/sequence/0,2-3208,1-0,0.xml" filter: "/home/lucas/lemonde_getbody" target: imap://luser:password@imap.apinc.org/INBOX.Feeds.LeMonde # It is also possible to reuse the same string in the target parameter: # target-refix: &target "imap://user:pass@host/rss." # feeds: # - name: test1 # target: [ *target, 'test1' ] # ... # - name: test2 # target: [ *target, 'test2' ] # ... # vim: ft=yaml:sts=2:expandtab feed2imap-1.3.4/data/man/0000755000004100000410000000000014731252744015100 5ustar www-datawww-datafeed2imap-1.3.4/data/man/man5/0000755000004100000410000000000014731252744015740 5ustar www-datawww-datafeed2imap-1.3.4/data/man/man5/feed2imaprc.50000644000004100000410000000235214731252744020211 0ustar www-datawww-data.TH feed2imaprc 5 "Jul 25, 2005" .SH NAME feed2imaprc \- feed2imap configuration file .SH SYNOPSIS \fBfeed2imaprc\fR is feed2imap's configuration file. It is usually located in \fB~/.feed2imaprc\fR. .SH EXAMPLE See \fB/usr/share/doc/feed2imap/examples/feed2imaprc\fR. .SH "RESERVED CHARACTERS" Some characters are reserved in RFC2396 (URI). If you need to include a reserved character in the login/password part of your target URI, replace it with its hex code. For example, @ can be replaced by %40. .SH BUGS This manpage should probably give more details. However, the example configuration file is very well documented. .SH "SEE ALSO" Homepage : http://home.gna.org/feed2imap/ .PP \fBfeed2imap\fR(1) .SH AUTHOR Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net .PP This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. .PP This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. feed2imap-1.3.4/data/man/man1/0000755000004100000410000000000014731252744015734 5ustar www-datawww-datafeed2imap-1.3.4/data/man/man1/feed2imap-cleaner.10000644000004100000410000000301414731252744021257 0ustar www-datawww-data.TH feed2imap\-cleaner 1 "Jul 25, 2005" .SH NAME feed2imap\-cleaner \- Removes old items from IMAP folders .SH SYNOPSIS \fBfeed2imap\-cleaner\fR [OPTIONS] .SH DESCRIPTION feed2imap\-cleaner deletes old items from IMAP folders specified in the configuration file. The actual query string used to determine whether an item is old is : "SEEN NOT FLAGGED BEFORE (3 days ago)". Which means that an item WON'T be deleted if it satisfies one of the following conditions : .TP 0.2i \(bu It isn't 3 days old ; .TP 0.2i \(bu It hasn't been read yet ; .TP 0.2i \(bu It is flagged (marked as Important, for example). .TP \fB\-d\fR, \fB\-\-dry\-run\fR Don't remove anything, but show what would be removed if run without this option. .TP \fB\-f\fR, \fB\-\-config \fIfile\fB\fR Use another config file (~/.feed2imaprc is the default). .SH BUGS Deletion criterias should probably be more configurable. .SH "SEE ALSO" Homepage : http://home.gna.org/feed2imap/ .PP \fBfeed2imaprc\fR(5), \fBfeed2imap\fR(1) .SH AUTHOR Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net .PP This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. .PP This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. feed2imap-1.3.4/data/man/man1/feed2imap.10000644000004100000410000000272614731252744017661 0ustar www-datawww-data.TH feed2imap 1 "Jul 25, 2005" .SH NAME feed2imap \- clever RSS/ATOM feed aggregator .SH SYNOPSIS \fBfeed2imap\fR [OPTIONS] .SH DESCRIPTION feed2imap is an RSS/Atom feed aggregator. After Downloading feeds (over HTTP or HTTPS), it uploads them to a specified folder of an IMAP mail server. The user can then access the feeds using Mutt, Evolution, Mozilla Thunderbird or even a webmail. .TP \fB\-V\fR, \fB\-\-version\fR Show version information. .TP \fB\-v\fR, \fB\-\-verbose\fR Run in verbose mode. .TP \fB\-c\fR, \fB\-\-rebuild\-cache\fR Rebuilds the cache. Fetches all items and mark them as already seen. Useful if you lose your .feed2imap.cache file. .TP \fB\-f\fR, \fB\-\-config \fIfile\fB\fR Use another config file (~/.feed2imaprc is the default). .SH "SEE ALSO" Homepage : http://home.gna.org/feed2imap/ .PP \fBfeed2imaprc\fR(5), \fBfeed2imap\-cleaner\fR(1), \fBfeed2imap\-dumpconfig\fR(1), \fBfeed2imap\-opmlimport\fR(1) .SH AUTHOR Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net .PP This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. .PP This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. feed2imap-1.3.4/data/man/man1/feed2imap-dumpconfig.10000644000004100000410000000174414731252744022011 0ustar www-datawww-data.TH feed2imap\-dumpconfig 1 "Jul 25, 2005" .SH NAME feed2imap\-dumpconfig \- Dump feed2imap config .SH SYNOPSIS \fBfeed2imap\-dumpconfig\fR [OPTIONS] .SH DESCRIPTION feed2imap\-dumpconfig dumps the content of your feed2imaprc to screen. .TP \fB\-f\fR, \fB\-\-config \fIfile\fB\fR Use another config file (~/.feed2imaprc is the default). .SH "SEE ALSO" Homepage : http://home.gna.org/feed2imap/ .PP \fBfeed2imaprc\fR(5), \fBfeed2imap\fR(1) .SH AUTHOR Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net .PP This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. .PP This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. feed2imap-1.3.4/data/man/man1/feed2imap-opmlimport.10000644000004100000410000000215014731252744022050 0ustar www-datawww-data.TH feed2imap\-opmlimport 1 "Jul 25, 2005" .SH NAME feed2imap\-opmlimport \- Convert an OPML subscription list to a feed2imap config file .SH SYNOPSIS \fBfeed2imap\-opmlimport\fR .SH DESCRIPTION feed2imap\-opmlimport reads an OPML subscription list on standard input and outputs a feed2imap configuration file on standard output. The resulting configuration file will require some tweaking. .SH BUGS Should probably accept parameters to be able to change default values. .SH "SEE ALSO" Homepage : http://home.gna.org/feed2imap/ .PP \fBfeed2imaprc\fR(5), \fBfeed2imap\fR(1) .SH AUTHOR Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net .PP This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. .PP This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. feed2imap-1.3.4/bin/0000755000004100000410000000000014731252744014164 5ustar www-datawww-datafeed2imap-1.3.4/bin/feed2imap-opmlimport0000755000004100000410000000303114731252744020143 0ustar www-datawww-data#!/usr/bin/ruby =begin Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server Copyright (c) 2005 Lucas Nussbaum This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA =end $:.unshift File.join(File.dirname(__FILE__), '..', 'lib') require 'rexml/document' require 'yaml' DEFAULTIMAPFOLDER = 'imap://login:password@imapserver/folder.folder2' opml = ARGV[0] doc = nil doc = REXML::Document::new(IO.read(opml)) feeds = [] doc.root.each_element('//outline') do |e| if u = e.attribute('xmlUrl') || e.attribute('htmlUrl') # dirty liferea hack next if u.value == 'vfolder' # get title t = e.attribute('text') || e.attribute('Title') || nil if t.nil? title = '*** FEED TITLE (must be unique) ***' else title = t.value end url = u.value feeds.push({'name' => title, 'url' => url, 'target' => DEFAULTIMAPFOLDER}) end end YAML::dump({'feeds' => feeds}, $stdout) feed2imap-1.3.4/bin/feed2imap0000755000004100000410000000233014731252744015744 0ustar www-datawww-data#!/usr/bin/ruby $:.unshift File.join(File.dirname(__FILE__), '..', 'lib') require 'feed2imap/feed2imap' require 'optparse' verbose = false version = false cacherebuild = false configf = ENV['HOME'] + '/.feed2imaprc' progname = File::basename($PROGRAM_NAME) opts = OptionParser::new do |opts| opts.program_name = progname opts.banner = "Usage: #{progname} [options]" opts.separator "" opts.separator "Options:" opts.on("-v", "--verbose", "Verbose mode") do |v| verbose = true end opts.on("-d", "--debug", "Debug mode") do |v| verbose = :debug end opts.on("-V", "--version", "Display Feed2Imap version") do |v| version = true end opts.on("-c", "--rebuild-cache", "Cache rebuilding run : will fetch everything and add to cache, without uploading to the IMAP server. Useful if your cache file was lost, and you don't want to re-read all the items.") do |c| cacherebuild = true end opts.on("-f", "--config ", "Select alternate config file") do |f| configf = f end end begin opts.parse!(ARGV) rescue OptionParser::ParseError => pe opts.warn pe puts opts exit 1 end if version puts "Feed2Imap v.#{Feed2Imap::VERSION}" else Feed2Imap::new(verbose, cacherebuild, configf) end feed2imap-1.3.4/bin/feed2imap-cleaner0000755000004100000410000000132514731252744017356 0ustar www-datawww-data#!/usr/bin/ruby $:.unshift File.join(File.dirname(__FILE__), '..', 'lib') require 'feed2imap/feed2imap' require 'optparse' configf = ENV['HOME'] + '/.feed2imaprc' dryrun = false opts = OptionParser::new do |opts| opts.banner = "Usage: feed2imap-cleaner [options]" opts.separator "" opts.separator "Options:" opts.on("-d", "--dry-run", "Dont really remove messages") do |v| dryrun = true end opts.on("-f", "--config ", "Select alternate config file") do |f| configf = f end end opts.parse!(ARGV) config = nil File::open(configf) { |f| config = F2IConfig::new(f) } config.imap_accounts.each_value do |ac| ac.connect end config.feeds.each do |f| f.imapaccount.cleanup(f.folder, dryrun) end feed2imap-1.3.4/bin/feed2imap-dumpconfig0000755000004100000410000000254414731252744020104 0ustar www-datawww-data#!/usr/bin/ruby =begin Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server Copyright (c) 2005 Lucas Nussbaum This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA =end $:.unshift File.join(File.dirname(__FILE__), '..', 'lib') require 'feed2imap/config' require 'optparse' configf = ENV['HOME'] + '/.feed2imaprc' opts = OptionParser::new do |opts| opts.banner = "Usage: ./dumpconfig.rb [options]" opts.separator "" opts.separator "Options:" opts.on("-f", "--config ", "Select alternate config file") do |f| configf = f end end opts.parse!(ARGV) if not File::exist?(configf) puts "Configuration file #{configfile} not found." exit(1) end File::open(configf) { |f| puts F2IConfig::new(f).to_s } feed2imap-1.3.4/feed2imap.gemspec0000644000004100000410000000474514731252744016627 0ustar www-datawww-data######################################################### # This file has been automatically generated by gem2tgz # ######################################################### # -*- encoding: utf-8 -*- # stub: feed2imap 1.3.4 ruby lib Gem::Specification.new do |s| s.name = "feed2imap".freeze s.version = "1.3.4" s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version= s.require_paths = ["lib".freeze] s.authors = ["Lucas Nussbaum".freeze] s.date = "2024-12-20" s.description = "RSS/Atom feed aggregator".freeze s.executables = ["feed2imap".freeze, "feed2imap-cleaner".freeze, "feed2imap-dumpconfig".freeze, "feed2imap-opmlimport".freeze] s.files = ["COPYING".freeze, "ChangeLog".freeze, "README".freeze, "Rakefile".freeze, "bin/feed2imap".freeze, "bin/feed2imap-cleaner".freeze, "bin/feed2imap-dumpconfig".freeze, "bin/feed2imap-opmlimport".freeze, "data/doc/feed2imap/examples/feed2imaprc".freeze, "data/man/man1/feed2imap-cleaner.1".freeze, "data/man/man1/feed2imap-dumpconfig.1".freeze, "data/man/man1/feed2imap-opmlimport.1".freeze, "data/man/man1/feed2imap.1".freeze, "data/man/man5/feed2imaprc.5".freeze, "lib/feed2imap.rb".freeze, "lib/feed2imap/cache.rb".freeze, "lib/feed2imap/config.rb".freeze, "lib/feed2imap/feed2imap.rb".freeze, "lib/feed2imap/html2text-parser.rb".freeze, "lib/feed2imap/httpfetcher.rb".freeze, "lib/feed2imap/imap.rb".freeze, "lib/feed2imap/itemtomail.rb".freeze, "lib/feed2imap/maildir.rb".freeze, "lib/feed2imap/rexml_settings.rb".freeze, "lib/feed2imap/sgml-parser.rb".freeze, "lib/feed2imap/version.rb".freeze, "test/maildir/cur/1376317520.15784_1.debian:2,S".freeze, "test/maildir/cur/1376317520.15789_1.debian:2,S".freeze, "test/maildir/cur/1376319137.17850_1.debian:2,".freeze, "test/maildir/cur/1376320022.18396_5.debian:2,FS".freeze, "test/maildir/new/1376320099.18396_7.debian".freeze, "test/tc_cache.rb".freeze, "test/tc_config.rb".freeze, "test/tc_httpfetcher.rb".freeze, "test/tc_itemtomail.rb".freeze, "test/tc_maildir.rb".freeze] s.rubygems_version = "3.3.15".freeze s.summary = "RSS/Atom feed aggregator".freeze if s.respond_to? :specification_version then s.specification_version = 4 end if s.respond_to? :add_runtime_dependency then s.add_runtime_dependency(%q.freeze, [">= 1.1.4"]) s.add_runtime_dependency(%q.freeze, [">= 0.9"]) else s.add_dependency(%q.freeze, [">= 1.1.4"]) s.add_dependency(%q.freeze, [">= 0.9"]) end end feed2imap-1.3.4/lib/0000755000004100000410000000000014731252744014162 5ustar www-datawww-datafeed2imap-1.3.4/lib/feed2imap.rb0000644000004100000410000000003614731252744016342 0ustar www-datawww-datarequire 'feed2imap/feed2imap' feed2imap-1.3.4/lib/feed2imap/0000755000004100000410000000000014731252744016016 5ustar www-datawww-datafeed2imap-1.3.4/lib/feed2imap/httpfetcher.rb0000644000004100000410000000706214731252744020670 0ustar www-datawww-data=begin Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server Copyright (c) 2005 Lucas Nussbaum This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA =end require 'zlib' require 'net/http' # get openssl if available begin require 'net/https' rescue LoadError end require 'uri' # max number of redirections MAXREDIR = 5 HTTPDEBUG = false # Class used to retrieve the feed over HTTP class HTTPFetcher @timeout = 30 # should be enough for everybody... def timeout=(value) @timeout = value end def fetcher(baseuri, uri, lastcheck, recursion) proxy_host = nil proxy_port = nil proxy_user = nil proxy_pass = nil if ENV['http_proxy'] proxy_uri = URI.parse(ENV['http_proxy']) proxy_host = proxy_uri.host proxy_port = proxy_uri.port proxy_user, proxy_pass = proxy_uri.userinfo.split(/:/) if proxy_uri.userinfo end http = Net::HTTP::Proxy(proxy_host, proxy_port, proxy_user, proxy_pass ).new(uri.host, uri.port) http.read_timeout = @timeout http.open_timeout = @timeout if uri.scheme == 'https' http.use_ssl = true http.verify_mode = OpenSSL::SSL::VERIFY_NONE end if defined?(Feed2Imap) useragent = "Feed2Imap v#{Feed2Imap.version} http://home.gna.org/feed2imap/" else useragent = 'Feed2Imap http://home.gna.org/feed2imap/' end headers = { 'User-Agent' => useragent, 'Accept-Encoding' => 'gzip', } if lastcheck != Time::at(0) headers.merge!('If-Modified-Since' => lastcheck.httpdate) end req = Net::HTTP::Get::new(uri.request_uri, headers) if uri.userinfo login, pw = uri.userinfo.split(':') req.basic_auth(login, pw) # workaround. eg. wikini redirects and loses auth info. elsif uri.host == baseuri.host and baseuri.userinfo login, pw = baseuri.userinfo.split(':') req.basic_auth(login, pw) end begin response = http.request(req) rescue Timeout::Error raise "Timeout while fetching #{baseuri.to_s}" end case response when Net::HTTPSuccess case response['Content-Encoding'] when 'gzip' return Zlib::GzipReader.new(StringIO.new(response.body)).read else return response.body end when Net::HTTPRedirection # if not modified if Net::HTTPNotModified === response puts "HTTPNotModified on #{uri}" if HTTPDEBUG return nil end if recursion > 0 redir = URI::join(uri.to_s, response['location']) return fetcher(baseuri, redir, lastcheck, recursion - 1) else raise "Too many redirections while fetching #{baseuri.to_s}" end else raise "#{response.code}: #{response.message} while fetching #{baseuri.to_s}" end end def fetch(url, lastcheck) uri = URI::parse(url) return fetcher(uri, uri, lastcheck, MAXREDIR) end end feed2imap-1.3.4/lib/feed2imap/feed2imap.rb0000644000004100000410000002510214731252744020177 0ustar www-datawww-data=begin Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server Copyright (c) 2005 Lucas Nussbaum This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA =end F2I_WARNFETCHTIME = 10 require 'feed2imap/version' require 'feed2imap/config' require 'feed2imap/cache' require 'feed2imap/httpfetcher' require 'logger' require 'thread' require 'feedparser' require 'feed2imap/rexml_settings' require 'feed2imap/itemtomail' require 'open3' class Feed2Imap def Feed2Imap.version return Feed2Imap::VERSION end def initialize(verbose, cacherebuild, configfile) @logger = Logger::new(STDOUT) if verbose == :debug @logger.level = Logger::DEBUG require 'pp' elsif verbose == true @logger.level = Logger::INFO else @logger.level = Logger::WARN end @logger.info("Feed2Imap V.#{Feed2Imap::VERSION} started") # reading config @logger.info('Reading configuration file ...') if not File::exist?(configfile) @logger.fatal("Configuration file #{configfile} not found.") exit(1) end if (File::stat(configfile).mode & 044) != 0 @logger.warn("Configuration file is readable by other users. It " + "probably contains your password.") end begin File::open(configfile) { |f| @config = F2IConfig::new(f) } rescue @logger.fatal("Error while reading configuration file, exiting: #{$!}") exit(1) end if @logger.level == Logger::DEBUG @logger.debug("Configuration read:") pp(@config) end # init cache @logger.info('Initializing cache ...') @cache = ItemCache::new(@config.updateddebug) if not File::exist?(@config.cache + '.lock') f = File::new(@config.cache + '.lock', 'w') f.close end if File::new(@config.cache + '.lock', 'w').flock(File::LOCK_EX | File::LOCK_NB) == false @logger.fatal("Another instance of feed2imap is already locking the cache file") exit(1) end if not File::exist?(@config.cache) @logger.warn("Cache file #{@config.cache} not found, using a new one") else File::open(@config.cache) do |f| @cache.load(f) end end # connecting all IMAP accounts @logger.info('Connecting to IMAP accounts ...') @config.imap_accounts.each_value do |ac| begin ac.connect rescue @logger.fatal("Error while connecting to #{ac}, exiting: #{$!}") exit(1) end end # for each feed, fetch, upload to IMAP and cache @logger.info("Fetching and filtering feeds ...") ths = [] mutex = Mutex::new sparefetchers = 16 # max number of fetchers running at the same time. sparefetchers_mutex = Mutex::new sparefetchers_cond = ConditionVariable::new @config.feeds.each do |f| ths << Thread::new(f) do |feed| begin mutex.lock lastcheck = @cache.get_last_check(feed.name) if feed.needfetch(lastcheck) mutex.unlock sparefetchers_mutex.synchronize do while sparefetchers <= 0 sparefetchers_cond.wait(sparefetchers_mutex) end sparefetchers -= 1 end fetch_start = Time::now if feed.url fetcher = HTTPFetcher::new fetcher::timeout = @config.timeout s = fetcher::fetch(feed.url, @cache.get_last_check(feed.name)) elsif feed.execurl # avoid running more than one command at the same time. # We need it because the called command might not be # thread-safe, and we need to get the right exitcode mutex.lock s = %x{#{feed.execurl}} if $? && $?.exitstatus != 0 @logger.warn("Command for #{feed.name} exited with status #{$?.exitstatus} !") end mutex.unlock else @logger.warn("No way to fetch feed #{feed.name} !") end if feed.filter and s != nil # avoid running more than one command at the same time. # We need it because the called command might not be # thread-safe, and we need to get the right exitcode. mutex.lock # hack hack hack, avoid buffering problems begin stdin, stdout, stderr = Open3::popen3(feed.filter) inth = Thread::new do stdin.puts s stdin.close end output = nil outh = Thread::new do output = stdout.read end err = nil errth = Thread::new do err = stderr.read.chomp end inth.join outh.join errth.join err.lines do |e| @logger.warn(e.chomp) end s = output if $? && $?.exitstatus != 0 @logger.warn("Filter command for #{feed.name} exited with status #{$?.exitstatus}. Output might be corrupted !") end ensure mutex.unlock end end if Time::now - fetch_start > F2I_WARNFETCHTIME @logger.info("Fetching feed #{feed.name} took #{(Time::now - fetch_start).to_i}s") end sparefetchers_mutex.synchronize do sparefetchers += 1 sparefetchers_cond.signal end mutex.lock feed.body = s @cache.set_last_check(feed.name, Time::now) else @logger.debug("Feed #{feed.name} doesn't need to be checked again for now.") end mutex.unlock # dump if requested if @config.dumpdir mutex.synchronize do if feed.body fname = @config.dumpdir + '/' + feed.name + '-' + Time::now.xmlschema File::open(fname, 'w') { |file| file.puts feed.body } end end end # dump this feed if requested if feed.dumpdir mutex.synchronize do if feed.body fname = feed.dumpdir + '/' + feed.name + '-' + Time::now.xmlschema File::open(fname, 'w') { |file| file.puts feed.body } end end end rescue Timeout::Error mutex.synchronize do n = @cache.fetch_failed(feed.name) m = "Timeout::Error while fetching #{feed.url}: #{$!} (failed #{n} times)" if n > @config.max_failures @logger.fatal(m) else @logger.info(m) end end rescue mutex.synchronize do n = @cache.fetch_failed(feed.name) m = "Error while fetching #{feed.url}: #{$!} (failed #{n} times)" if n > @config.max_failures @logger.fatal(m) else @logger.info(m) end end end end end ths.each { |t| t.join } @logger.info("Parsing and uploading ...") @config.feeds.each do |f| if f.body.nil? # means 304 @logger.debug("Feed #{f.name} did not change.") next end begin feed = FeedParser::Feed::new(f.body.force_encoding('UTF-8'), f.url) rescue Exception n = @cache.parse_failed(f.name) m = "Error while parsing #{f.name}: #{$!} (failed #{n} times)" if n > @config.max_failures @logger.fatal(m) else @logger.info(m) end next end begin newitems, updateditems = @cache.get_new_items(f.name, feed.items, f.always_new, f.ignore_hash) rescue @logger.fatal("Exception caught when selecting new items for #{f.name}: #{$!}") puts $!.backtrace next end @logger.info("#{f.name}: #{newitems.length} new items, #{updateditems.length} updated items.") if newitems.length > 0 or updateditems.length > 0 or @logger.level == Logger::DEBUG begin if !cacherebuild fn = f.name.gsub(/[^0-9A-Za-z]/,'') updateditems.each do |i| id = "<#{fn}-#{i.cacheditem.index}@#{@config.hostname}>" email = item_to_mail(@config, i, id, true, f.name, f.include_images, f.wrapto) f.imapaccount.updatemail(f.folder, email, id, i.date || Time::new, f.reupload_if_updated) end # reverse is needed to upload older items first (fixes gna#8986) newitems.reverse.each do |i| id = "<#{fn}-#{i.cacheditem.index}@#{@config.hostname}>" email = item_to_mail(@config, i, id, false, f.name, f.include_images, f.wrapto) f.imapaccount.putmail(f.folder, email, i.date || Time::new) end end rescue @logger.fatal("Exception caught while uploading mail to #{f.folder}: #{$!}") puts $!.backtrace @logger.fatal("We can't recover from IMAP errors, so we are exiting.") exit(1) end begin @cache.commit_cache(f.name) rescue @logger.fatal("Exception caught while updating cache for #{f.name}: #{$!}") next end end @logger.info("Finished. Saving cache ...") begin File::open("#{@config.cache}.new", 'w') { |f| @cache.save(f) } rescue @logger.fatal("Exception caught while writing new cache to #{@config.cache}.new: #{$!}") end begin File::rename("#{@config.cache}.new", @config.cache) rescue @logger.fatal("Exception caught while renaming #{@config.cache}.new to #{@config.cache}: #{$!}") end @logger.info("Closing IMAP connections ...") @config.imap_accounts.each_value do |ac| begin ac.disconnect rescue # servers tend to cause an exception to be raised here, hence the INFO level. @logger.info("Exception caught while closing connection to #{ac.to_s}: #{$!}") end end end end feed2imap-1.3.4/lib/feed2imap/html2text-parser.rb0000644000004100000410000000432114731252744021570 0ustar www-datawww-data=begin Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server Copyright (c) 2005 Lucas Nussbaum This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA =end require 'feed2imap/sgml-parser' # this class provides a simple SGML parser that removes HTML tags class HTML2TextParser < SGMLParser attr_reader :savedata def initialize(verbose = false) @savedata = '' @pre = false @href = nil @links = [] super(verbose) end def handle_data(data) # let's remove all CR data.gsub!(/\n/, '') if not @pre @savedata << data end def unknown_starttag(tag, attrs) case tag when 'p' @savedata << "\n\n" when 'br' @savedata << "\n" when 'b' @savedata << '*' when 'u' @savedata << '_' when 'i' @savedata << '/' when 'pre' @savedata << "\n\n" @pre = true when 'a' # find href in args @href = nil attrs.each do |a| if a[0] == 'href' @href = a[1] end end if @href @links << @href.gsub(/^("|'|)(.*)("|')$/,'\2') end end end def close super if @links.length > 0 @savedata << "\n\n" @links.each_index do |i| @savedata << "[#{i+1}] #{@links[i]}\n" end end end def unknown_endtag(tag) case tag when 'b' @savedata << '*' when 'u' @savedata << '_' when 'i' @savedata << '/' when 'pre' @savedata << "\n\n" @pre = false when 'a' if @href @savedata << "[#{@links.length}]" @href = nil end end end end feed2imap-1.3.4/lib/feed2imap/config.rb0000644000004100000410000001257014731252744017615 0ustar www-datawww-data=begin Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server Copyright (c) 2005 Lucas Nussbaum This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA =end require 'cgi' require 'yaml' require 'uri' require 'feed2imap/imap' require 'feed2imap/maildir' require 'etc' require 'socket' require 'set' # Default cache file DEFCACHE = ENV['HOME'] + '/.feed2imap.cache' # Hostname and login name of the current user HOSTNAME = Socket.gethostname LOGNAME = Etc.getlogin # Feed2imap configuration class F2IConfig attr_reader :imap_accounts, :cache, :feeds, :dumpdir, :updateddebug, :max_failures, :include_images, :default_email, :hostname, :reupload_if_updated, :parts, :timeout # Load the configuration from the IO stream # TODO should do some sanity check on the data read. def initialize(io) @conf = YAML::safe_load(io, aliases: true) @cache = @conf['cache'] || DEFCACHE @dumpdir = @conf['dumpdir'] || nil @conf['feeds'] ||= [] @feeds = [] @max_failures = (@conf['max-failures'] || 10).to_i @updateddebug = false @updateddebug = @conf['debug-updated'] if @conf.has_key?('debug-updated') @parts = %w(text html) @parts = Array(@conf['parts']) if @conf.has_key?('parts') && !@conf['parts'].empty? @parts = Set.new(@parts) @include_images = true @include_images = @conf['include-images'] if @conf.has_key?('include-images') @parts << 'html' if @include_images && ! @parts.include?('html') @reupload_if_updated = true @reupload_if_updated = @conf['reupload-if-updated'] if @conf.has_key?('reupload-if-updated') @timeout = if @conf['timeout'] == nil then 30 else @conf['timeout'].to_i end @default_email = (@conf['default-email'] || "#{LOGNAME}@#{HOSTNAME}") ImapAccount.no_ssl_verify = (@conf.has_key?('disable-ssl-verification') and @conf['disable-ssl-verification'] == true) @hostname = HOSTNAME # FIXME: should this be configurable as well? @imap_accounts = ImapAccounts::new maildir_account = MaildirAccount::new @conf['feeds'].each do |f| f['name'] = f['name'].to_s if f['disable'].nil? uri = URI::parse(Array(f['target']).join('')) path = CGI::unescape(uri.path) if uri.scheme == 'maildir' @feeds.push(ConfigFeed::new(f, maildir_account, path, self)) else # remove leading slash from IMAP mailbox names path = path[1..-1] if path[0,1] == '/' @feeds.push(ConfigFeed::new(f, @imap_accounts.add_account(uri), path, self)) end end end end def to_s s = "Your Feed2Imap config :\n" s += "=======================\n" s += "Cache file: #{@cache}\n\n" s += "Imap accounts I'll have to connect to :\n" s += "---------------------------------------\n" @imap_accounts.each_value { |i| s += i.to_s + "\n" } s += "\nFeeds :\n" s += "-------\n" i = 1 @feeds.each do |f| s += "#{i}. #{f.name}\n" s += " URL: #{f.url}\n" s += " IMAP Account: #{f.imapaccount}\n" s += " Folder: #{f.folder}\n" if not f.wrapto s += " Not wrapped.\n" end s += "\n" i += 1 end s end end # A configured feed. simple data container. class ConfigFeed attr_reader :name, :url, :imapaccount, :folder, :always_new, :execurl, :filter, :ignore_hash, :dumpdir, :wrapto, :include_images, :reupload_if_updated attr_accessor :body def initialize(f, imapaccount, folder, f2iconfig) @name = f['name'] @url = f['url'] @url.sub!(/^feed:/, '') if @url =~ /^feed:/ @imapaccount = imapaccount @folder = encode_utf7 folder @freq = f['min-frequency'] @always_new = false @always_new = f['always-new'] if f.has_key?('always-new') @execurl = f['execurl'] @filter = f['filter'] @ignore_hash = false @ignore_hash = f['ignore-hash'] if f.has_key?('ignore-hash') @freq = @freq.to_i if @freq @dumpdir = f['dumpdir'] || nil @wrapto = if f['wrapto'] == nil then 72 else f['wrapto'].to_i end @include_images = f2iconfig.include_images @include_images = f['include-images'] if f.has_key?('include-images') @reupload_if_updated = f2iconfig.reupload_if_updated @reupload_if_updated = f['reupload-if-updated'] if f.has_key?('reupload-if-updated') end def needfetch(lastcheck) return true if @freq.nil? return (lastcheck + @freq * 3600) < Time::now end def encode_utf7(s) if "foo".respond_to?(:force_encoding) return Net::IMAP::encode_utf7 s else # this is a copy of the Net::IMAP::encode_utf7 w/o the force_encoding return s.gsub(/(&)|([^\x20-\x7e]+)/u) { if $1 "&-" else base64 = [$&.unpack("U*").pack("n*")].pack("m") "&" + base64.delete("=\n").tr("/", ",") + "-" end } end end end feed2imap-1.3.4/lib/feed2imap/maildir.rb0000644000004100000410000001141614731252744017767 0ustar www-datawww-data=begin Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server, or local Maildir Copyright (c) 2009 Andreas Rottmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . =end require 'uri' require 'fileutils' require 'fcntl' require 'rmail' require 'socket' class MaildirAccount MYHOSTNAME = Socket.gethostname @@seq_num = 0 attr_reader :uri def putmail(folder, mail, date = Time::now) store_message(folder_dir(folder), date, nil) do |f| f.puts(mail) end end def updatemail(folder, mail, idx, date = Time::now, reupload_if_updated = true) dir = folder_dir(folder) guarantee_maildir(dir) mail_files = find_mails(dir, idx) if mail_files.length > 0 # get the info from the first result and delete everything info = maildir_file_info(mail_files[0]) mail_files.each { |f| File.delete(File.join(dir, f)) } elsif not reupload_if_updated # mail not present, and we don't want to re-upload it return end store_message(dir, date, info) { |f| f.puts(mail) } end def to_s uri.to_s end def cleanup(folder, dryrun = false) dir = folder_dir(folder) puts "-- Considering #{dir}:" guarantee_maildir(dir) del_count = 0 recent_time = Time.now() - (3 * 24 * 60 * 60) # 3 days Dir[File.join(dir, 'cur', '*')].each do |fn| flags = maildir_file_info_flags(fn) # don't consider not-seen, flagged, or recent messages mtime = File.mtime(fn) next if (not flags.index('S') or flags.index('F') or mtime > recent_time) mail = File.open(fn) do |f| RMail::Parser.read(f) end subject = mail.header['Subject'] if dryrun puts "To remove: #{subject} #{mtime}" else puts "Removing: #{subject} #{mtime}" File.delete(fn) end del_count += 1 end puts "-- Deleted #{del_count} messages" return del_count end private def folder_dir(folder) return File.join('/', folder) end def store_message(dir, date, info, &block) guarantee_maildir(dir) stored = false Dir.chdir(dir) do |d| timer = 30 fd = nil while timer >= 0 new_fn = new_maildir_basefn(date) tmp_path = File.join(dir, 'tmp', new_fn) new_path = File.join(dir, 'new', new_fn) begin fd = IO::sysopen(tmp_path, Fcntl::O_WRONLY | Fcntl::O_EXCL | Fcntl::O_CREAT) break rescue Errno::EEXIST sleep 2 timer -= 2 next end end if fd begin f = IO.open(fd) # provide a writable interface for the caller yield f f.fsync File.link tmp_path, new_path stored = true ensure File.unlink tmp_path if File.exist? tmp_path end end if stored and info cur_path = File.join(dir, 'cur', new_fn + ':' + info) File.rename(new_path, cur_path) end end # Dir.chdir return stored end def find_mails(dir, idx) dir_paths = [] ['cur', 'new'].each do |d| subdir = File.join(dir, d) raise "#{subdir} not a directory" unless File.directory? subdir Dir[File.join(subdir, '*')].each do |fn| File.open(fn) do |f| mail = RMail::Parser.read(f) cache_index = mail.header['Message-ID'] if cache_index && (cache_index == idx || cache_index == "<#{idx}>") dir_paths.push(File.join(d, File.basename(fn))) end end end end return dir_paths end def guarantee_maildir(dir) # Ensure maildir-folderness ['new', 'cur', 'tmp'].each do |d| FileUtils.mkdir_p(File.join(dir, d)) end end def maildir_file_info(file) basename = File.basename(file) colon = basename.rindex(':') return (colon and basename[colon + 1 .. -1]) end # Re-written and no longer shamelessly taken from # http://gitorious.org/sup/mainline/blobs/master/lib/sup/maildir.rb def new_maildir_basefn(date) fn = "#{date.to_i.to_s}.#{@@seq_num.to_s}.#{MYHOSTNAME}" @@seq_num += 1 fn end def maildir_file_info_flags(fn) parts = fn.split(',') if parts.size == 1 '' else parts.last end end end feed2imap-1.3.4/lib/feed2imap/rexml_settings.rb0000644000004100000410000000006214731252744021410 0ustar www-datawww-dataREXML::Security.entity_expansion_text_limit *= 10 feed2imap-1.3.4/lib/feed2imap/sgml-parser.rb0000644000004100000410000001636514731252744020612 0ustar www-datawww-data# A parser for SGML, using the derived class as static DTD. # from http://raa.ruby-lang.org/project/html-parser class SGMLParser # Regular expressions used for parsing: Interesting = /[&<]/ Incomplete = Regexp.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' + '<([a-zA-Z][^<>]*|/([a-zA-Z][^<>]*)?|' + '![^<>]*)?') Entityref = /&([a-zA-Z][-.a-zA-Z0-9]*)[^-.a-zA-Z0-9]/ Charref = /&#([0-9]+)[^0-9]/ Starttagopen = /<[>a-zA-Z]/ Endtagopen = /<\/[<>a-zA-Z]/ Endbracket = /[<>]/ Special = /]*>/ Commentopen = /