# -*- coding: utf-8 -*- import re from django.contrib.auth import get_user_model from django.core.management.base import BaseCommand from django.template.defaultfilters import slugify from django.utils.datetime_safe import datetime from content.models import Article, Category import xlrd # noinspection PyPep8 class Command(BaseCommand): help = "Importiert die alten Daten aus einer CSV Datei" # @ReservedAssignment date_header_regex = r"""

(?P[\d\.]*)[\ -]*[\ -]*(?P.*)</h1>(?P<content>.*)""" header_regex = r"""<h1>[\ -]*(?P<title>.*)</h1>(?P<content>.*)""" def __init__(self): self.author = get_user_model().objects.get(username="xeniac") self.category = Category.objects.get(slug='allgemeines') super(Command, self).__init__() def create_article(self): self.slug = slugify(self.headline[:50]) article, created = Article.objects.get_or_create(slug=self.slug, date_created=self.date_created, defaults={ 'author': self.author, 'headline_de': self.headline, 'content_de': self.content, 'category': self.category }) if created: print "Created: %s - %s" % (self.date_created, self.headline) article.clean() article.save() def parse_with_date(self, original): match_obj = re.search(self.date_header_regex, original, re.IGNORECASE | re.DOTALL) if match_obj: self.date_created = datetime.strptime(match_obj.group('date'), '%d.%m.%Y') self.headline = match_obj.group('title').strip() self.content = match_obj.group('content').strip() return True else: return False def parse_without_date(self, original): match_obj = re.search(self.header_regex, original, re.IGNORECASE | re.DOTALL) if match_obj: self.date_created = datetime.strptime('01.01.1982', '%d.%m.%Y') self.headline = match_obj.group('title').strip() self.content = match_obj.group('content').strip() return True else: return False def handle(self, *args, **options): try: xls_file = xlrd.open_workbook(args[0]) except IndexError: print "Bitte den Pfad zur CSV Datei angeben!" return False except IOError: print "Datei '%s' wurde nicht gefunden! " % args[0] return False table = xls_file.sheet_by_index(0) for row in xrange(1, table.nrows): if not table.cell_value(row, 2) in ('Archiv', 'News'): continue original = table.cell_value(row, 3) if self.parse_with_date(original) or self.parse_without_date( original): self.create_article() else: print "Fehler bei String!" print table.cell_value(row, 3)