#!/usr/bin/env python # # RssDB # import sys import sqlite3 import bz2 import binascii import pickle import logging from time import strftime # available at http://feedparser.org or http://code.google.com/p/feedparser/ import feedparser __version='0.1' class RssDB(): ''' RSS database ''' def __init__(self, database='rss.sqlite', thread_limit=3): ''' init database - database treads_limit - threads limit ''' self.__db_conn = sqlite3.connect(database) self.__db_conn.row_factory = sqlite3.Row self.__db_cursor = self.__db_conn.cursor() #insert initial values into feed database self.__db_cursor.execute('CREATE TABLE IF NOT EXISTS feeds (id INTEGER PRIMARY KEY AUTOINCREMENT, url VARCHAR(1000) UNIQUE);') self.__db_cursor.execute('CREATE TABLE IF NOT EXISTS entries (feed_id, link UNIQUE, data);') # logging self.__logger = logging.getLogger(database.split('.')[0]) self.__logger.setLevel(logging.DEBUG) # console handler ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) # formatter ch.setFormatter(logging.Formatter('%(asctime)s;%(name)s;%(levelname)s: %(message)s')) self.__logger.addHandler(ch) def log(self, level, message): ''' logging message ''' if level == 'debug': self.__logger.debug(message) elif level == 'info': self.__logger.info(message) elif level == 'warn': self.__logger.warn(message) elif level == 'error': self.__logger.error(message) elif level == 'critical': self.__logger.critical(message) else: print "Warning! Unkown logging level" def sync(self): ''' RSS DB Commit ''' self.__db_conn.commit() def compress_data(self, data): ''' compress data ''' return binascii.hexlify(bz2.compress(data)) def feeds(self): ''' return list of feeds in database ''' return self.__db_cursor.execute('SELECT id, url FROM feeds').fetchall() def add_feed(self, feed): ''' add feed to database return: True - if feed was added False - if feed wasn't added ''' try: self.__db_cursor.execute('INSERT INTO feeds (url) VALUES ("%s");' % feed) self.sync() return True except sqlite3.IntegrityError: return False def update_feed_entries(self): ''' update feed entries ''' for info in self.feeds(): self.log('info', (info['url'])) entries = feedparser.parse(info['url']).entries self.store_feed_items(info['id'], entries) def store_feed_items(self, id, items): ''' Takes a feed_id and a list of items and stored them in the DB ''' new_items = 0 for entry in items: self.__db_cursor.execute('SELECT link from entries WHERE link="%s"' % entry.link) if len(self.__db_cursor.fetchall()) == 0: self.__db_cursor.execute('INSERT INTO entries (feed_id, link, data) VALUES (?,?,?)', \ (id, entry.link, self.compress_data(pickle.dumps(dict(entry))))) new_items += 1 self.sync() if new_items > 0: self.log('info', 'new items: %d' % new_items) def remove_entries(self): ''' Remove entries ''' self.__db_cursor.execute("DELETE FROM entries;") self.__db_cursor.execute("VACUUM;") self.sync() def statistics(self): ''' show statistics ''' print "Total feeds: %d" % self.__db_cursor.execute('SELECT count(*) FROM feeds;').fetchone()[0] print "Total entries: %d" % self.__db_cursor.execute('SELECT count(*) FROM entries;').fetchone()[0] print "Feeds:" for info in self.feeds(): print "entries: % 5d, feed: %s" % \ (self.__db_cursor.execute('SELECT count(*) FROM entries WHERE feed_id=%d;' % info[0]).fetchone()[0], info[1]) if __name__ == "__main__": from optparse import OptionParser parser = OptionParser(version="%prog, v." + __version) parser.add_option("-d", "--database", dest="database", help="RSS database, by default: rss.sqlite") parser.add_option("-a", "--add_feed", dest="add_feed", help="add feed link to database") parser.add_option("-l", "--feed_list", action='store_true', dest="feed_list", default=False, help="printout feed list in database") parser.add_option("-u", "--update_feed_entries", action='store_true', dest="update_feed_entries", default=False, help="update feed entries") parser.add_option("-s", "--statistics", action='store_true', dest="statistics", default=False, help="show statistics") parser.add_option("-r", "--remove_entries", action='store_true', dest="remove_entries", default=False, help="remove entries") (options, args) = parser.parse_args() # definition of database if options.database: rss_db = RssDB(database=options.database) else: rss_db = RssDB() # action: add new feed if options.add_feed: if rss_db.add_feed(options.add_feed): rss_db.log('info', 'Feed: %s was added' % options.add_feed) else: rss_db.log('warn', 'Feed: %s wasn\'t added' % options.add_feed) sys.exit() # action: print out list of feeds elif options.feed_list: for feed in rss_db.feeds(): print "%03d:%s" % (feed[0], feed[1]) sys.exit() # action: update for new feed entries elif options.update_feed_entries: try: rss_db.update_feed_entries() except KeyboardInterrupt: print "Interrupted by user" sys.exit() # printout statistics elif options.statistics: print "Statistics" rss_db.statistics() sys.exit() # remove entries elif options.remove_entries: print "Remove feed entries" rss_db.remove_entries() sys.exit() else: print "No arguments. Please use -h for help"