""" Created on 05.02.2011 @author: christian """ from datetime import datetime, timedelta import HTMLParser import urllib2 from django.conf import settings from django.contrib.sites.models import Site from django.utils import timezone import django.db.models import feedparser from utils.html5 import models class FeedManager(django.db.models.Manager): def active(self): site = settings.SITE_ID feeds = self.filter(is_functional=True, site=site) for feed in feeds: if feed.last_update: feed_age = timezone.now() - feed.last_update if feed_age > timedelta(hours=12): feed.parse() else: feed.parse() return feeds class FeedItemManager(django.db.models.Manager): def recent_items(self, max_items=10, site=None): site = site or settings.SITE_ID return self.select_related().filter(feed__site=site)[:max_items] class Feed(django.db.models.Model): title = models.CharField(max_length=500) site = models.ForeignKey(Site) feed_url = models.URLField(unique=True, max_length=255) public_url = models.URLField(max_length=255) last_update = models.DateTimeField(blank=True, null=True) is_functional = models.BooleanField(default=True) objects = FeedManager() def __unicode__(self): return self.title def parse(self): parsed_feed = feedparser.parse(self.feed_url) html_parser = HTMLParser.HTMLParser() if parsed_feed.bozo and type( parsed_feed.bozo_exception) == urllib2.URLError: self.is_functional = False return self.save() for feed_entry in parsed_feed.entries: title = html_parser.unescape(feed_entry.title) if not title: continue link = feed_entry.link guid = feed_entry.get("id", link) summary = html_parser.unescape( feed_entry.get("summary", feed_entry.get( "description", feed_entry.get("content", u"") )) ) date_modified = feed_entry.get( "published_parsed", parsed_feed.get("published_parsed", timezone.now)) date_modified = timezone.make_aware( datetime(*date_modified[:6]), timezone.get_current_timezone()) feed_item, updated = self.feed_items.get_or_create( guid=guid, defaults={ 'title': title, 'link': link, 'summary': summary, 'date_modified': date_modified }) feed_item.save() self.last_update = timezone.now() return self.save() class Meta: ordering = ("title",) class FeedItem(django.db.models.Model): feed = models.ForeignKey(Feed, related_name='feed_items') title = models.CharField(max_length=500) link = models.URLField(max_length=500) guid = models.CharField(max_length=255, unique=True, db_index=True) summary = models.TextField(blank=True) date_modified = models.DateTimeField() objects = FeedItemManager() class Meta: ordering = ("-date_modified",) def __unicode__(self): return self.title def get_absolute_url(self): return self.link