Files
kasu/content/management/commands/importarticles.py
2014-11-26 13:13:07 +01:00

74 lines
2.9 KiB
Python

# -*- coding: utf-8 -*-
from content.models import Article, Category
from django.contrib.auth.models import User
from django.core.management.base import BaseCommand
from django.template.defaultfilters import slugify
from django.utils.datetime_safe import datetime
import re
import xlrd #@UnresolvedImport
class Command(BaseCommand):
help = "Importiert die alten Daten aus einer CSV Datei" #@ReservedAssignment
date_header_regex = r"""<h1><span class=\"small\">(?P<date>[\d\.]*)[\ -]*</span>[\ -]*(?P<title>.*)</h1>(?P<content>.*)"""
header_regex = r"""<h1>[\ -]*(?P<title>.*)</h1>(?P<content>.*)"""
def __init__(self):
self.author = User.objects.get(username="xeniac")
self.category = Category.objects.get(slug='allgemeines')
def create_article(self):
self.slug = slugify(self.headline[:50])
article, created = Article.objects.get_or_create(slug=self.slug, date_created=self.date_created,
defaults={
'author': self.author,
'headline_de': self.headline,
'content_de': self.content,
'category': self.category
})
if created:
print "Created: %s - %s" % (self.date_created, self.headline)
article.clean()
article.save()
def parse_with_date(self, original):
match_obj = re.search(self.date_header_regex, original, re.IGNORECASE | re.DOTALL)
if match_obj:
self.date_created = datetime.strptime(match_obj.group('date'), '%d.%m.%Y')
self.headline = match_obj.group('title').strip()
self.content = match_obj.group('content').strip()
return True
else:
return False
def parse_without_date(self, original):
match_obj = re.search(self.header_regex, original, re.IGNORECASE | re.DOTALL)
if match_obj:
self.date_created = datetime.strptime('01.01.1982', '%d.%m.%Y')
self.headline = match_obj.group('title').strip()
self.content = match_obj.group('content').strip()
return True
else:
return False
def handle(self, *args, **options):
try:
xls_file = xlrd.open_workbook(args[0])
except IndexError:
print "Bitte den Pfad zur CSV Datei angeben!"
return False
except IOError:
print "Datei '%s' wurde nicht gefunden! " % args[0]
return False
table = xls_file.sheet_by_index(0)
for row in xrange(1, table.nrows):
if not table.cell_value(row, 2) in ('Archiv', 'News'):
continue
original = table.cell_value(row, 3)
if self.parse_with_date(original) or self.parse_without_date(original):
self.create_article()
else:
print "Fehler bei String!"
print table.cell_value(row, 3)