neue Verzeichnissstruktur

2014-11-26 13:13:07 +01:00
parent daa35f5913
commit f34281089d
3372 changed files with 168 additions and 2544 deletions
--- a/utils/icalendar/parser.py
+++ b/utils/icalendar/parser.py
@@ -0,0 +1,522 @@
+# -*- coding: utf-8 -*-
+
+"""
+This module parses and generates contentlines as defined in RFC 2445
+(iCalendar), but will probably work for other MIME types with similar syntax.
+Eg. RFC 2426 (vCard)
+
+It is stupid in the sense that it treats the content purely as strings. No type
+conversion is attempted.
+
+Copyright, 2005: Max M <maxm@mxm.dk>
+License: GPL (Just contact med if and why you would like it changed)
+"""
+
+# from python
+from types import TupleType, ListType
+SequenceTypes = [TupleType, ListType]
+import re
+# from this package
+from .caselessdict import CaselessDict
+
+
+#################################################################
+# Property parameter stuff
+
+def paramVal(val):
+    "Returns a parameter value"
+    if type(val) in SequenceTypes:
+        return q_join(val)
+    return dQuote(val)
+
+# Could be improved
+NAME = re.compile('[\w-]+')
+UNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7F",:;]')
+QUNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7F"]')
+FOLD = re.compile('([\r]?\n)+[ \t]{1}')
+
+def validate_token(name):
+    match = NAME.findall(name)
+    if len(match) == 1 and name == match[0]:
+        return
+    raise ValueError, name
+
+def validate_param_value(value, quoted=True):
+    validator = UNSAFE_CHAR
+    if quoted:
+        validator = QUNSAFE_CHAR
+    if validator.findall(value):
+        raise ValueError, value
+
+QUOTABLE = re.compile('[,;:].')
+def dQuote(val):
+    """
+    Parameter values containing [,;:] must be double quoted
+    >>> dQuote('Max')
+    'Max'
+    >>> dQuote('Rasmussen, Max')
+    '"Rasmussen, Max"'
+    >>> dQuote('name:value')
+    '"name:value"'
+    """
+    if QUOTABLE.search(val):
+        return '"%s"' % val
+    return val
+
+# parsing helper
+def q_split(st, sep=','):
+    """
+    Splits a string on char, taking double (q)uotes into considderation
+    >>> q_split('Max,Moller,"Rasmussen, Max"')
+    ['Max', 'Moller', '"Rasmussen, Max"']
+    """
+    result = []
+    cursor = 0
+    length = len(st)
+    inquote = 0
+    for i in range(length):
+        ch = st[i]
+        if ch == '"':
+            inquote = not inquote
+        if not inquote and ch == sep:
+            result.append(st[cursor:i])
+            cursor = i + 1
+        if i + 1 == length:
+            result.append(st[cursor:])
+    return result
+
+def q_join(lst, sep=','):
+    """
+    Joins a list on sep, quoting strings with QUOTABLE chars
+    >>> s = ['Max', 'Moller', 'Rasmussen, Max']
+    >>> q_join(s)
+    'Max,Moller,"Rasmussen, Max"'
+    """
+    return sep.join([dQuote(itm) for itm in lst])
+
+class Parameters(CaselessDict):
+    """
+    Parser and generator of Property parameter strings. It knows nothing of
+    datatypes. It's main concern is textual structure.
+
+
+    Simple parameter:value pair
+    >>> p = Parameters(parameter1='Value1')
+    >>> str(p)
+    'PARAMETER1=Value1'
+
+
+    keys are converted to upper
+    >>> p.keys()
+    ['PARAMETER1']
+
+
+    Parameters are case insensitive
+    >>> p['parameter1']
+    'Value1'
+    >>> p['PARAMETER1']
+    'Value1'
+
+
+    Parameter with list of values must be seperated by comma
+    >>> p = Parameters({'parameter1':['Value1', 'Value2']})
+    >>> str(p)
+    'PARAMETER1=Value1,Value2'
+
+
+    Multiple parameters must be seperated by a semicolon
+    >>> p = Parameters({'RSVP':'TRUE', 'ROLE':'REQ-PARTICIPANT'})
+    >>> str(p)
+    'ROLE=REQ-PARTICIPANT;RSVP=TRUE'
+
+
+    Parameter values containing ',;:' must be double quoted
+    >>> p = Parameters({'ALTREP':'http://www.wiz.org'})
+    >>> str(p)
+    'ALTREP="http://www.wiz.org"'
+
+
+    list items must be quoted seperately
+    >>> p = Parameters({'MEMBER':['MAILTO:projectA@host.com', 'MAILTO:projectB@host.com', ]})
+    >>> str(p)
+    'MEMBER="MAILTO:projectA@host.com","MAILTO:projectB@host.com"'
+
+    Now the whole sheebang
+    >>> p = Parameters({'parameter1':'Value1', 'parameter2':['Value2', 'Value3'],\
+                          'ALTREP':['http://www.wiz.org', 'value4']})
+    >>> str(p)
+    'ALTREP="http://www.wiz.org",value4;PARAMETER1=Value1;PARAMETER2=Value2,Value3'
+
+    We can also parse parameter strings
+    >>> Parameters.from_string('PARAMETER1=Value 1;param2=Value 2')
+    Parameters({'PARAMETER1': 'Value 1', 'PARAM2': 'Value 2'})
+
+    Including empty strings
+    >>> Parameters.from_string('param=')
+    Parameters({'PARAM': ''})
+
+    We can also parse parameter strings
+    >>> Parameters.from_string('MEMBER="MAILTO:projectA@host.com","MAILTO:projectB@host.com"')
+    Parameters({'MEMBER': ['MAILTO:projectA@host.com', 'MAILTO:projectB@host.com']})
+
+    We can also parse parameter strings
+    >>> Parameters.from_string('ALTREP="http://www.wiz.org",value4;PARAMETER1=Value1;PARAMETER2=Value2,Value3')
+    Parameters({'PARAMETER1': 'Value1', 'ALTREP': ['http://www.wiz.org', 'value4'], 'PARAMETER2': ['Value2', 'Value3']})
+    """
+
+
+    def params(self):
+        """
+        in rfc2445 keys are called parameters, so this is to be consitent with
+        the naming conventions
+        """
+        return self.keys()
+
+### Later, when I get more time... need to finish this off now. The last majot thing missing.
+###    def _encode(self, name, value, cond=1):
+###        # internal, for conditional convertion of values.
+###        if cond:
+###            klass = types_factory.for_property(name)
+###            return klass(value)
+###        return value
+###
+###    def add(self, name, value, encode=0):
+###        "Add a parameter value and optionally encode it."
+###        if encode:
+###            value = self._encode(name, value, encode)
+###        self[name] = value
+###
+###    def decoded(self, name):
+###        "returns a decoded value, or list of same"
+
+    def __repr__(self):
+        return 'Parameters(' + dict.__repr__(self) + ')'
+
+
+    def __str__(self):
+        result = []
+        items = self.items()
+        items.sort() # To make doctests work
+        for key, value in items:
+            value = paramVal(value)
+            result.append('%s=%s' % (key.upper(), value))
+        return ';'.join(result)
+
+
+    def from_string(st, strict=False):
+        "Parses the parameter format from ical text format"
+        try:
+            # parse into strings
+            result = Parameters()
+            for param in q_split(st, ';'):
+                key, val = q_split(param, '=')
+                validate_token(key)
+                param_values = [v for v in q_split(val, ',')]
+                # Property parameter values that are not in quoted
+                # strings are case insensitive.
+                vals = []
+                for v in param_values:
+                    if v.startswith('"') and v.endswith('"'):
+                        v = v.strip('"')
+                        validate_param_value(v, quoted=True)
+                        vals.append(v)
+                    else:
+                        validate_param_value(v, quoted=False)
+                        if strict:
+                            vals.append(v.upper())
+                        else:
+                            vals.append(v)
+                if not vals:
+                    result[key] = val
+                else:
+                    if len(vals) == 1:
+                        result[key] = vals[0]
+                    else:
+                        result[key] = vals
+            return result
+        except:
+            raise ValueError, 'Not a valid parameter string'
+    from_string = staticmethod(from_string)
+
+
+#########################################
+# parsing and generation of content lines
+
+class Contentline(str):
+    """
+    A content line is basically a string that can be folded and parsed into
+    parts.
+
+    >>> c = Contentline('Si meliora dies, ut vina, poemata reddit')
+    >>> str(c)
+    'Si meliora dies, ut vina, poemata reddit'
+
+    A long line gets folded
+    >>> c = Contentline(''.join(['123456789 ']*10))
+    >>> str(c)
+    '123456789 123456789 123456789 123456789 123456789 123456789 123456789 1234\\r\\n 56789 123456789 123456789 '
+
+    A folded line gets unfolded
+    >>> c = Contentline.from_string(str(c))
+    >>> c
+    '123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 '
+
+    We do not fold within a UTF-8 character:
+    >>> c = Contentline('This line has a UTF-8 character where it should be folded. Make sure it g\xc3\xabts folded before that character.')
+    >>> '\xc3\xab' in str(c)
+    True
+
+    Don't fail if we fold a line that is exactly X times 74 characters long:
+    >>> c = str(Contentline(''.join(['x']*148)))
+
+    It can parse itself into parts. Which is a tuple of (name, params, vals)
+
+    >>> c = Contentline('dtstart:20050101T120000')
+    >>> c.parts()
+    ('dtstart', Parameters({}), '20050101T120000')
+
+    >>> c = Contentline('dtstart;value=datetime:20050101T120000')
+    >>> c.parts()
+    ('dtstart', Parameters({'VALUE': 'datetime'}), '20050101T120000')
+
+    >>> c = Contentline('ATTENDEE;CN=Max Rasmussen;ROLE=REQ-PARTICIPANT:MAILTO:maxm@example.com')
+    >>> c.parts()
+    ('ATTENDEE', Parameters({'ROLE': 'REQ-PARTICIPANT', 'CN': 'Max Rasmussen'}), 'MAILTO:maxm@example.com')
+    >>> str(c)
+    'ATTENDEE;CN=Max Rasmussen;ROLE=REQ-PARTICIPANT:MAILTO:maxm@example.com'
+
+    and back again
+    >>> parts = ('ATTENDEE', Parameters({'ROLE': 'REQ-PARTICIPANT', 'CN': 'Max Rasmussen'}), 'MAILTO:maxm@example.com')
+    >>> Contentline.from_parts(parts)
+    'ATTENDEE;CN=Max Rasmussen;ROLE=REQ-PARTICIPANT:MAILTO:maxm@example.com'
+
+    and again
+    >>> parts = ('ATTENDEE', Parameters(), 'MAILTO:maxm@example.com')
+    >>> Contentline.from_parts(parts)
+    'ATTENDEE:MAILTO:maxm@example.com'
+
+    A value can also be any of the types defined in PropertyValues
+    >>> from icalendar.prop import vText
+    >>> parts = ('ATTENDEE', Parameters(), vText('MAILTO:test@example.com'))
+    >>> Contentline.from_parts(parts)
+    'ATTENDEE:MAILTO:test@example.com'
+
+    A value can also be unicode
+    >>> from icalendar.prop import vText
+    >>> parts = ('SUMMARY', Parameters(), vText(u'INternational char <20> <20> <20>'))
+    >>> Contentline.from_parts(parts)
+    'SUMMARY:INternational char \\xc3\\xa6 \\xc3\\xb8 \\xc3\\xa5'
+
+    Traversing could look like this.
+    >>> name, params, vals = c.parts()
+    >>> name
+    'ATTENDEE'
+    >>> vals
+    'MAILTO:maxm@example.com'
+    >>> for key, val in params.items():
+    ...     (key, val)
+    ('ROLE', 'REQ-PARTICIPANT')
+    ('CN', 'Max Rasmussen')
+
+    And the traditional failure
+    >>> c = Contentline('ATTENDEE;maxm@example.com')
+    >>> c.parts()
+    Traceback (most recent call last):
+        ...
+    ValueError: Content line could not be parsed into parts
+
+    Another failure:
+    >>> c = Contentline(':maxm@example.com')
+    >>> c.parts()
+    Traceback (most recent call last):
+        ...
+    ValueError: Content line could not be parsed into parts
+
+    >>> c = Contentline('key;param=:value')
+    >>> c.parts()
+    ('key', Parameters({'PARAM': ''}), 'value')
+
+    >>> c = Contentline('key;param="pvalue":value')
+    >>> c.parts()
+    ('key', Parameters({'PARAM': 'pvalue'}), 'value')
+
+    Should bomb on missing param:
+    >>> c = Contentline.from_string("k;:no param")
+    >>> c.parts()
+    Traceback (most recent call last):
+        ...
+    ValueError: Content line could not be parsed into parts
+
+    >>> c = Contentline('key;param=pvalue:value', strict=False)
+    >>> c.parts()
+    ('key', Parameters({'PARAM': 'pvalue'}), 'value')
+
+    If strict is set to True, uppercase param values that are not
+    double-quoted, this is because the spec says non-quoted params are
+    case-insensitive.
+
+    >>> c = Contentline('key;param=pvalue:value', strict=True)
+    >>> c.parts()
+    ('key', Parameters({'PARAM': 'PVALUE'}), 'value')
+
+    >>> c = Contentline('key;param="pValue":value', strict=True)
+    >>> c.parts()
+    ('key', Parameters({'PARAM': 'pValue'}), 'value')
+    
+    """
+
+    def __new__(cls, st, strict=False):
+        self = str.__new__(cls, st)
+        setattr(self, 'strict', strict)
+        return self
+
+    def from_parts(parts):
+        "Turns a tuple of parts into a content line"
+        (name, params, values) = [str(p) for p in parts]
+        try:
+            if params:
+                return Contentline('%s;%s:%s' % (name, params, values))
+            return Contentline('%s:%s' % (name, values))
+        except:
+            raise ValueError(
+                'Property: %s Wrong values "%s" or "%s"' % (repr(name),
+                                                            repr(params),
+                                                            repr(values)))
+    from_parts = staticmethod(from_parts)
+
+    def parts(self):
+        """ Splits the content line up into (name, parameters, values) parts
+        """
+        try:
+            name_split = None
+            value_split = None
+            inquotes = 0
+            for i in range(len(self)):
+                ch = self[i]
+                if not inquotes:
+                    if ch in ':;' and not name_split:
+                        name_split = i
+                    if ch == ':' and not value_split:
+                        value_split = i
+                if ch == '"':
+                    inquotes = not inquotes
+            name = self[:name_split]
+            if not name:
+                raise ValueError, 'Key name is required'
+            validate_token(name)
+            if name_split + 1 == value_split:
+                raise ValueError, 'Invalid content line'
+            params = Parameters.from_string(self[name_split + 1:value_split],
+                                            strict=self.strict)
+            values = self[value_split + 1:]
+            return (name, params, values)
+        except:
+            raise ValueError, 'Content line could not be parsed into parts'
+
+    def from_string(st, strict=False):
+        "Unfolds the content lines in an iCalendar into long content lines"
+        try:
+            # a fold is carriage return followed by either a space or a tab
+            return Contentline(FOLD.sub('', st), strict=strict)
+        except:
+            raise ValueError, 'Expected StringType with content line'
+    from_string = staticmethod(from_string)
+
+    def __str__(self):
+        "Long content lines are folded so they are less than 75 characters wide"
+        l_line = len(self)
+        new_lines = []
+        start = 0
+        end = 74
+        while True:
+            if end >= l_line:
+                end = l_line
+            else:
+                # Check that we don't fold in the middle of a UTF-8 character:
+                # http://lists.osafoundation.org/pipermail/ietf-calsify/2006-August/001126.html
+                while True:
+                    char_value = ord(self[end])
+                    if char_value < 128 or char_value >= 192:
+                        # This is not in the middle of a UTF-8 character, so we
+                        # can fold here:
+                        break
+                    else:
+                        end -= 1
+
+            new_lines.append(self[start:end])
+            if end == l_line:
+                # Done
+                break
+            start = end
+            end = start + 74
+        return '\r\n '.join(new_lines)
+
+
+
+class Contentlines(list):
+    """
+    I assume that iCalendar files generally are a few kilobytes in size. Then
+    this should be efficient. for Huge files, an iterator should probably be
+    used instead.
+
+    >>> c = Contentlines([Contentline('BEGIN:VEVENT\\r\\n')])
+    >>> str(c)
+    'BEGIN:VEVENT\\r\\n'
+
+    Lets try appending it with a 100 charater wide string
+    >>> c.append(Contentline(''.join(['123456789 ']*10)+'\\r\\n'))
+    >>> str(c)
+    'BEGIN:VEVENT\\r\\n\\r\\n123456789 123456789 123456789 123456789 123456789 123456789 123456789 1234\\r\\n 56789 123456789 123456789 \\r\\n'
+
+    Notice that there is an extra empty string in the end of the content lines.
+    That is so they can be easily joined with: '\r\n'.join(contentlines)).
+    >>> Contentlines.from_string('A short line\\r\\n')
+    ['A short line', '']
+    >>> Contentlines.from_string('A faked\\r\\n  long line\\r\\n')
+    ['A faked long line', '']
+    >>> Contentlines.from_string('A faked\\r\\n  long line\\r\\nAnd another lin\\r\\n\\te that is folded\\r\\n')
+    ['A faked long line', 'And another line that is folded', '']
+    """
+
+    def __str__(self):
+        "Simply join self."
+        return '\r\n'.join(map(str, self))
+
+    def from_string(st):
+        "Parses a string into content lines"
+        try:
+            # a fold is carriage return followed by either a space or a tab
+            unfolded = FOLD.sub('', st)
+            lines = [Contentline(line) for line in unfolded.splitlines() if line]
+            lines.append('') # we need a '\r\n' in the end of every content line
+            return Contentlines(lines)
+        except:
+            raise ValueError, 'Expected StringType with content lines'
+    from_string = staticmethod(from_string)
+
+
+# ran this:
+#    sample = open('./samples/test.ics', 'rb').read() # binary file in windows!
+#    lines = Contentlines.from_string(sample)
+#    for line in lines[:-1]:
+#        print line.parts()
+
+# got this:
+#('BEGIN', Parameters({}), 'VCALENDAR')
+#('METHOD', Parameters({}), 'Request')
+#('PRODID', Parameters({}), '-//My product//mxm.dk/')
+#('VERSION', Parameters({}), '2.0')
+#('BEGIN', Parameters({}), 'VEVENT')
+#('DESCRIPTION', Parameters({}), 'This is a very long description that ...')
+#('PARTICIPANT', Parameters({'CN': 'Max M'}), 'MAILTO:maxm@mxm.dk')
+#('DTEND', Parameters({}), '20050107T160000')
+#('DTSTART', Parameters({}), '20050107T120000')
+#('SUMMARY', Parameters({}), 'A second event')
+#('END', Parameters({}), 'VEVENT')
+#('BEGIN', Parameters({}), 'VEVENT')
+#('DTEND', Parameters({}), '20050108T235900')
+#('DTSTART', Parameters({}), '20050108T230000')
+#('SUMMARY', Parameters({}), 'A single event')
+#('UID', Parameters({}), '42')
+#('END', Parameters({}), 'VEVENT')
+#('END', Parameters({}), 'VCALENDAR')