Files
kasu/content/management/commands/importarticles.py
Christian Berg 86a0db050d Diverse Code Cleanups
*Code wurde PEP-8 gerecht formatiert
* Kleine Fehler die der PyCharm Inspector beanstandet wurden korrigiert
2014-11-26 16:04:52 +01:00

79 lines
3.2 KiB
Python

# -*- coding: utf-8 -*-
import re
from django.contrib.auth import get_user_model
from django.core.management.base import BaseCommand
from django.template.defaultfilters import slugify
from django.utils.datetime_safe import datetime
from content.models import Article, Category
import xlrd
# noinspection PyPep8
class Command(BaseCommand):
help = "Importiert die alten Daten aus einer CSV Datei" # @ReservedAssignment
date_header_regex = r"""<h1><span class=\"small\">(?P<date>[\d\.]*)[\ -]*</span>[\ -]*(?P<title>.*)</h1>(?P<content>.*)"""
header_regex = r"""<h1>[\ -]*(?P<title>.*)</h1>(?P<content>.*)"""
def __init__(self):
self.author = get_user_model().objects.get(username="xeniac")
self.category = Category.objects.get(slug='allgemeines')
super(Command, self).__init__()
def create_article(self):
self.slug = slugify(self.headline[:50])
article, created = Article.objects.get_or_create(slug=self.slug, date_created=self.date_created,
defaults={
'author': self.author,
'headline_de': self.headline,
'content_de': self.content,
'category': self.category
})
if created:
print "Created: %s - %s" % (self.date_created, self.headline)
article.clean()
article.save()
def parse_with_date(self, original):
match_obj = re.search(self.date_header_regex, original, re.IGNORECASE | re.DOTALL)
if match_obj:
self.date_created = datetime.strptime(match_obj.group('date'), '%d.%m.%Y')
self.headline = match_obj.group('title').strip()
self.content = match_obj.group('content').strip()
return True
else:
return False
def parse_without_date(self, original):
match_obj = re.search(self.header_regex, original, re.IGNORECASE | re.DOTALL)
if match_obj:
self.date_created = datetime.strptime('01.01.1982', '%d.%m.%Y')
self.headline = match_obj.group('title').strip()
self.content = match_obj.group('content').strip()
return True
else:
return False
def handle(self, *args, **options):
try:
xls_file = xlrd.open_workbook(args[0])
except IndexError:
print "Bitte den Pfad zur CSV Datei angeben!"
return False
except IOError:
print "Datei '%s' wurde nicht gefunden! " % args[0]
return False
table = xls_file.sheet_by_index(0)
for row in xrange(1, table.nrows):
if not table.cell_value(row, 2) in ('Archiv', 'News'):
continue
original = table.cell_value(row, 3)
if self.parse_with_date(original) or self.parse_without_date(original):
self.create_article()
else:
print "Fehler bei String!"
print table.cell_value(row, 3)