neue Verzeichnissstruktur
This commit is contained in:
522
utils/icalendar/parser.py
Normal file
522
utils/icalendar/parser.py
Normal file
@@ -0,0 +1,522 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This module parses and generates contentlines as defined in RFC 2445
|
||||
(iCalendar), but will probably work for other MIME types with similar syntax.
|
||||
Eg. RFC 2426 (vCard)
|
||||
|
||||
It is stupid in the sense that it treats the content purely as strings. No type
|
||||
conversion is attempted.
|
||||
|
||||
Copyright, 2005: Max M <maxm@mxm.dk>
|
||||
License: GPL (Just contact med if and why you would like it changed)
|
||||
"""
|
||||
|
||||
# from python
|
||||
from types import TupleType, ListType
|
||||
SequenceTypes = [TupleType, ListType]
|
||||
import re
|
||||
# from this package
|
||||
from .caselessdict import CaselessDict
|
||||
|
||||
|
||||
#################################################################
|
||||
# Property parameter stuff
|
||||
|
||||
def paramVal(val):
|
||||
"Returns a parameter value"
|
||||
if type(val) in SequenceTypes:
|
||||
return q_join(val)
|
||||
return dQuote(val)
|
||||
|
||||
# Could be improved
|
||||
NAME = re.compile('[\w-]+')
|
||||
UNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7F",:;]')
|
||||
QUNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7F"]')
|
||||
FOLD = re.compile('([\r]?\n)+[ \t]{1}')
|
||||
|
||||
def validate_token(name):
|
||||
match = NAME.findall(name)
|
||||
if len(match) == 1 and name == match[0]:
|
||||
return
|
||||
raise ValueError, name
|
||||
|
||||
def validate_param_value(value, quoted=True):
|
||||
validator = UNSAFE_CHAR
|
||||
if quoted:
|
||||
validator = QUNSAFE_CHAR
|
||||
if validator.findall(value):
|
||||
raise ValueError, value
|
||||
|
||||
QUOTABLE = re.compile('[,;:].')
|
||||
def dQuote(val):
|
||||
"""
|
||||
Parameter values containing [,;:] must be double quoted
|
||||
>>> dQuote('Max')
|
||||
'Max'
|
||||
>>> dQuote('Rasmussen, Max')
|
||||
'"Rasmussen, Max"'
|
||||
>>> dQuote('name:value')
|
||||
'"name:value"'
|
||||
"""
|
||||
if QUOTABLE.search(val):
|
||||
return '"%s"' % val
|
||||
return val
|
||||
|
||||
# parsing helper
|
||||
def q_split(st, sep=','):
|
||||
"""
|
||||
Splits a string on char, taking double (q)uotes into considderation
|
||||
>>> q_split('Max,Moller,"Rasmussen, Max"')
|
||||
['Max', 'Moller', '"Rasmussen, Max"']
|
||||
"""
|
||||
result = []
|
||||
cursor = 0
|
||||
length = len(st)
|
||||
inquote = 0
|
||||
for i in range(length):
|
||||
ch = st[i]
|
||||
if ch == '"':
|
||||
inquote = not inquote
|
||||
if not inquote and ch == sep:
|
||||
result.append(st[cursor:i])
|
||||
cursor = i + 1
|
||||
if i + 1 == length:
|
||||
result.append(st[cursor:])
|
||||
return result
|
||||
|
||||
def q_join(lst, sep=','):
|
||||
"""
|
||||
Joins a list on sep, quoting strings with QUOTABLE chars
|
||||
>>> s = ['Max', 'Moller', 'Rasmussen, Max']
|
||||
>>> q_join(s)
|
||||
'Max,Moller,"Rasmussen, Max"'
|
||||
"""
|
||||
return sep.join([dQuote(itm) for itm in lst])
|
||||
|
||||
class Parameters(CaselessDict):
|
||||
"""
|
||||
Parser and generator of Property parameter strings. It knows nothing of
|
||||
datatypes. It's main concern is textual structure.
|
||||
|
||||
|
||||
Simple parameter:value pair
|
||||
>>> p = Parameters(parameter1='Value1')
|
||||
>>> str(p)
|
||||
'PARAMETER1=Value1'
|
||||
|
||||
|
||||
keys are converted to upper
|
||||
>>> p.keys()
|
||||
['PARAMETER1']
|
||||
|
||||
|
||||
Parameters are case insensitive
|
||||
>>> p['parameter1']
|
||||
'Value1'
|
||||
>>> p['PARAMETER1']
|
||||
'Value1'
|
||||
|
||||
|
||||
Parameter with list of values must be seperated by comma
|
||||
>>> p = Parameters({'parameter1':['Value1', 'Value2']})
|
||||
>>> str(p)
|
||||
'PARAMETER1=Value1,Value2'
|
||||
|
||||
|
||||
Multiple parameters must be seperated by a semicolon
|
||||
>>> p = Parameters({'RSVP':'TRUE', 'ROLE':'REQ-PARTICIPANT'})
|
||||
>>> str(p)
|
||||
'ROLE=REQ-PARTICIPANT;RSVP=TRUE'
|
||||
|
||||
|
||||
Parameter values containing ',;:' must be double quoted
|
||||
>>> p = Parameters({'ALTREP':'http://www.wiz.org'})
|
||||
>>> str(p)
|
||||
'ALTREP="http://www.wiz.org"'
|
||||
|
||||
|
||||
list items must be quoted seperately
|
||||
>>> p = Parameters({'MEMBER':['MAILTO:projectA@host.com', 'MAILTO:projectB@host.com', ]})
|
||||
>>> str(p)
|
||||
'MEMBER="MAILTO:projectA@host.com","MAILTO:projectB@host.com"'
|
||||
|
||||
Now the whole sheebang
|
||||
>>> p = Parameters({'parameter1':'Value1', 'parameter2':['Value2', 'Value3'],\
|
||||
'ALTREP':['http://www.wiz.org', 'value4']})
|
||||
>>> str(p)
|
||||
'ALTREP="http://www.wiz.org",value4;PARAMETER1=Value1;PARAMETER2=Value2,Value3'
|
||||
|
||||
We can also parse parameter strings
|
||||
>>> Parameters.from_string('PARAMETER1=Value 1;param2=Value 2')
|
||||
Parameters({'PARAMETER1': 'Value 1', 'PARAM2': 'Value 2'})
|
||||
|
||||
Including empty strings
|
||||
>>> Parameters.from_string('param=')
|
||||
Parameters({'PARAM': ''})
|
||||
|
||||
We can also parse parameter strings
|
||||
>>> Parameters.from_string('MEMBER="MAILTO:projectA@host.com","MAILTO:projectB@host.com"')
|
||||
Parameters({'MEMBER': ['MAILTO:projectA@host.com', 'MAILTO:projectB@host.com']})
|
||||
|
||||
We can also parse parameter strings
|
||||
>>> Parameters.from_string('ALTREP="http://www.wiz.org",value4;PARAMETER1=Value1;PARAMETER2=Value2,Value3')
|
||||
Parameters({'PARAMETER1': 'Value1', 'ALTREP': ['http://www.wiz.org', 'value4'], 'PARAMETER2': ['Value2', 'Value3']})
|
||||
"""
|
||||
|
||||
|
||||
def params(self):
|
||||
"""
|
||||
in rfc2445 keys are called parameters, so this is to be consitent with
|
||||
the naming conventions
|
||||
"""
|
||||
return self.keys()
|
||||
|
||||
### Later, when I get more time... need to finish this off now. The last majot thing missing.
|
||||
### def _encode(self, name, value, cond=1):
|
||||
### # internal, for conditional convertion of values.
|
||||
### if cond:
|
||||
### klass = types_factory.for_property(name)
|
||||
### return klass(value)
|
||||
### return value
|
||||
###
|
||||
### def add(self, name, value, encode=0):
|
||||
### "Add a parameter value and optionally encode it."
|
||||
### if encode:
|
||||
### value = self._encode(name, value, encode)
|
||||
### self[name] = value
|
||||
###
|
||||
### def decoded(self, name):
|
||||
### "returns a decoded value, or list of same"
|
||||
|
||||
def __repr__(self):
|
||||
return 'Parameters(' + dict.__repr__(self) + ')'
|
||||
|
||||
|
||||
def __str__(self):
|
||||
result = []
|
||||
items = self.items()
|
||||
items.sort() # To make doctests work
|
||||
for key, value in items:
|
||||
value = paramVal(value)
|
||||
result.append('%s=%s' % (key.upper(), value))
|
||||
return ';'.join(result)
|
||||
|
||||
|
||||
def from_string(st, strict=False):
|
||||
"Parses the parameter format from ical text format"
|
||||
try:
|
||||
# parse into strings
|
||||
result = Parameters()
|
||||
for param in q_split(st, ';'):
|
||||
key, val = q_split(param, '=')
|
||||
validate_token(key)
|
||||
param_values = [v for v in q_split(val, ',')]
|
||||
# Property parameter values that are not in quoted
|
||||
# strings are case insensitive.
|
||||
vals = []
|
||||
for v in param_values:
|
||||
if v.startswith('"') and v.endswith('"'):
|
||||
v = v.strip('"')
|
||||
validate_param_value(v, quoted=True)
|
||||
vals.append(v)
|
||||
else:
|
||||
validate_param_value(v, quoted=False)
|
||||
if strict:
|
||||
vals.append(v.upper())
|
||||
else:
|
||||
vals.append(v)
|
||||
if not vals:
|
||||
result[key] = val
|
||||
else:
|
||||
if len(vals) == 1:
|
||||
result[key] = vals[0]
|
||||
else:
|
||||
result[key] = vals
|
||||
return result
|
||||
except:
|
||||
raise ValueError, 'Not a valid parameter string'
|
||||
from_string = staticmethod(from_string)
|
||||
|
||||
|
||||
#########################################
|
||||
# parsing and generation of content lines
|
||||
|
||||
class Contentline(str):
|
||||
"""
|
||||
A content line is basically a string that can be folded and parsed into
|
||||
parts.
|
||||
|
||||
>>> c = Contentline('Si meliora dies, ut vina, poemata reddit')
|
||||
>>> str(c)
|
||||
'Si meliora dies, ut vina, poemata reddit'
|
||||
|
||||
A long line gets folded
|
||||
>>> c = Contentline(''.join(['123456789 ']*10))
|
||||
>>> str(c)
|
||||
'123456789 123456789 123456789 123456789 123456789 123456789 123456789 1234\\r\\n 56789 123456789 123456789 '
|
||||
|
||||
A folded line gets unfolded
|
||||
>>> c = Contentline.from_string(str(c))
|
||||
>>> c
|
||||
'123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 '
|
||||
|
||||
We do not fold within a UTF-8 character:
|
||||
>>> c = Contentline('This line has a UTF-8 character where it should be folded. Make sure it g\xc3\xabts folded before that character.')
|
||||
>>> '\xc3\xab' in str(c)
|
||||
True
|
||||
|
||||
Don't fail if we fold a line that is exactly X times 74 characters long:
|
||||
>>> c = str(Contentline(''.join(['x']*148)))
|
||||
|
||||
It can parse itself into parts. Which is a tuple of (name, params, vals)
|
||||
|
||||
>>> c = Contentline('dtstart:20050101T120000')
|
||||
>>> c.parts()
|
||||
('dtstart', Parameters({}), '20050101T120000')
|
||||
|
||||
>>> c = Contentline('dtstart;value=datetime:20050101T120000')
|
||||
>>> c.parts()
|
||||
('dtstart', Parameters({'VALUE': 'datetime'}), '20050101T120000')
|
||||
|
||||
>>> c = Contentline('ATTENDEE;CN=Max Rasmussen;ROLE=REQ-PARTICIPANT:MAILTO:maxm@example.com')
|
||||
>>> c.parts()
|
||||
('ATTENDEE', Parameters({'ROLE': 'REQ-PARTICIPANT', 'CN': 'Max Rasmussen'}), 'MAILTO:maxm@example.com')
|
||||
>>> str(c)
|
||||
'ATTENDEE;CN=Max Rasmussen;ROLE=REQ-PARTICIPANT:MAILTO:maxm@example.com'
|
||||
|
||||
and back again
|
||||
>>> parts = ('ATTENDEE', Parameters({'ROLE': 'REQ-PARTICIPANT', 'CN': 'Max Rasmussen'}), 'MAILTO:maxm@example.com')
|
||||
>>> Contentline.from_parts(parts)
|
||||
'ATTENDEE;CN=Max Rasmussen;ROLE=REQ-PARTICIPANT:MAILTO:maxm@example.com'
|
||||
|
||||
and again
|
||||
>>> parts = ('ATTENDEE', Parameters(), 'MAILTO:maxm@example.com')
|
||||
>>> Contentline.from_parts(parts)
|
||||
'ATTENDEE:MAILTO:maxm@example.com'
|
||||
|
||||
A value can also be any of the types defined in PropertyValues
|
||||
>>> from icalendar.prop import vText
|
||||
>>> parts = ('ATTENDEE', Parameters(), vText('MAILTO:test@example.com'))
|
||||
>>> Contentline.from_parts(parts)
|
||||
'ATTENDEE:MAILTO:test@example.com'
|
||||
|
||||
A value can also be unicode
|
||||
>>> from icalendar.prop import vText
|
||||
>>> parts = ('SUMMARY', Parameters(), vText(u'INternational char <20> <20> <20>'))
|
||||
>>> Contentline.from_parts(parts)
|
||||
'SUMMARY:INternational char \\xc3\\xa6 \\xc3\\xb8 \\xc3\\xa5'
|
||||
|
||||
Traversing could look like this.
|
||||
>>> name, params, vals = c.parts()
|
||||
>>> name
|
||||
'ATTENDEE'
|
||||
>>> vals
|
||||
'MAILTO:maxm@example.com'
|
||||
>>> for key, val in params.items():
|
||||
... (key, val)
|
||||
('ROLE', 'REQ-PARTICIPANT')
|
||||
('CN', 'Max Rasmussen')
|
||||
|
||||
And the traditional failure
|
||||
>>> c = Contentline('ATTENDEE;maxm@example.com')
|
||||
>>> c.parts()
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: Content line could not be parsed into parts
|
||||
|
||||
Another failure:
|
||||
>>> c = Contentline(':maxm@example.com')
|
||||
>>> c.parts()
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: Content line could not be parsed into parts
|
||||
|
||||
>>> c = Contentline('key;param=:value')
|
||||
>>> c.parts()
|
||||
('key', Parameters({'PARAM': ''}), 'value')
|
||||
|
||||
>>> c = Contentline('key;param="pvalue":value')
|
||||
>>> c.parts()
|
||||
('key', Parameters({'PARAM': 'pvalue'}), 'value')
|
||||
|
||||
Should bomb on missing param:
|
||||
>>> c = Contentline.from_string("k;:no param")
|
||||
>>> c.parts()
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: Content line could not be parsed into parts
|
||||
|
||||
>>> c = Contentline('key;param=pvalue:value', strict=False)
|
||||
>>> c.parts()
|
||||
('key', Parameters({'PARAM': 'pvalue'}), 'value')
|
||||
|
||||
If strict is set to True, uppercase param values that are not
|
||||
double-quoted, this is because the spec says non-quoted params are
|
||||
case-insensitive.
|
||||
|
||||
>>> c = Contentline('key;param=pvalue:value', strict=True)
|
||||
>>> c.parts()
|
||||
('key', Parameters({'PARAM': 'PVALUE'}), 'value')
|
||||
|
||||
>>> c = Contentline('key;param="pValue":value', strict=True)
|
||||
>>> c.parts()
|
||||
('key', Parameters({'PARAM': 'pValue'}), 'value')
|
||||
|
||||
"""
|
||||
|
||||
def __new__(cls, st, strict=False):
|
||||
self = str.__new__(cls, st)
|
||||
setattr(self, 'strict', strict)
|
||||
return self
|
||||
|
||||
def from_parts(parts):
|
||||
"Turns a tuple of parts into a content line"
|
||||
(name, params, values) = [str(p) for p in parts]
|
||||
try:
|
||||
if params:
|
||||
return Contentline('%s;%s:%s' % (name, params, values))
|
||||
return Contentline('%s:%s' % (name, values))
|
||||
except:
|
||||
raise ValueError(
|
||||
'Property: %s Wrong values "%s" or "%s"' % (repr(name),
|
||||
repr(params),
|
||||
repr(values)))
|
||||
from_parts = staticmethod(from_parts)
|
||||
|
||||
def parts(self):
|
||||
""" Splits the content line up into (name, parameters, values) parts
|
||||
"""
|
||||
try:
|
||||
name_split = None
|
||||
value_split = None
|
||||
inquotes = 0
|
||||
for i in range(len(self)):
|
||||
ch = self[i]
|
||||
if not inquotes:
|
||||
if ch in ':;' and not name_split:
|
||||
name_split = i
|
||||
if ch == ':' and not value_split:
|
||||
value_split = i
|
||||
if ch == '"':
|
||||
inquotes = not inquotes
|
||||
name = self[:name_split]
|
||||
if not name:
|
||||
raise ValueError, 'Key name is required'
|
||||
validate_token(name)
|
||||
if name_split + 1 == value_split:
|
||||
raise ValueError, 'Invalid content line'
|
||||
params = Parameters.from_string(self[name_split + 1:value_split],
|
||||
strict=self.strict)
|
||||
values = self[value_split + 1:]
|
||||
return (name, params, values)
|
||||
except:
|
||||
raise ValueError, 'Content line could not be parsed into parts'
|
||||
|
||||
def from_string(st, strict=False):
|
||||
"Unfolds the content lines in an iCalendar into long content lines"
|
||||
try:
|
||||
# a fold is carriage return followed by either a space or a tab
|
||||
return Contentline(FOLD.sub('', st), strict=strict)
|
||||
except:
|
||||
raise ValueError, 'Expected StringType with content line'
|
||||
from_string = staticmethod(from_string)
|
||||
|
||||
def __str__(self):
|
||||
"Long content lines are folded so they are less than 75 characters wide"
|
||||
l_line = len(self)
|
||||
new_lines = []
|
||||
start = 0
|
||||
end = 74
|
||||
while True:
|
||||
if end >= l_line:
|
||||
end = l_line
|
||||
else:
|
||||
# Check that we don't fold in the middle of a UTF-8 character:
|
||||
# http://lists.osafoundation.org/pipermail/ietf-calsify/2006-August/001126.html
|
||||
while True:
|
||||
char_value = ord(self[end])
|
||||
if char_value < 128 or char_value >= 192:
|
||||
# This is not in the middle of a UTF-8 character, so we
|
||||
# can fold here:
|
||||
break
|
||||
else:
|
||||
end -= 1
|
||||
|
||||
new_lines.append(self[start:end])
|
||||
if end == l_line:
|
||||
# Done
|
||||
break
|
||||
start = end
|
||||
end = start + 74
|
||||
return '\r\n '.join(new_lines)
|
||||
|
||||
|
||||
|
||||
class Contentlines(list):
|
||||
"""
|
||||
I assume that iCalendar files generally are a few kilobytes in size. Then
|
||||
this should be efficient. for Huge files, an iterator should probably be
|
||||
used instead.
|
||||
|
||||
>>> c = Contentlines([Contentline('BEGIN:VEVENT\\r\\n')])
|
||||
>>> str(c)
|
||||
'BEGIN:VEVENT\\r\\n'
|
||||
|
||||
Lets try appending it with a 100 charater wide string
|
||||
>>> c.append(Contentline(''.join(['123456789 ']*10)+'\\r\\n'))
|
||||
>>> str(c)
|
||||
'BEGIN:VEVENT\\r\\n\\r\\n123456789 123456789 123456789 123456789 123456789 123456789 123456789 1234\\r\\n 56789 123456789 123456789 \\r\\n'
|
||||
|
||||
Notice that there is an extra empty string in the end of the content lines.
|
||||
That is so they can be easily joined with: '\r\n'.join(contentlines)).
|
||||
>>> Contentlines.from_string('A short line\\r\\n')
|
||||
['A short line', '']
|
||||
>>> Contentlines.from_string('A faked\\r\\n long line\\r\\n')
|
||||
['A faked long line', '']
|
||||
>>> Contentlines.from_string('A faked\\r\\n long line\\r\\nAnd another lin\\r\\n\\te that is folded\\r\\n')
|
||||
['A faked long line', 'And another line that is folded', '']
|
||||
"""
|
||||
|
||||
def __str__(self):
|
||||
"Simply join self."
|
||||
return '\r\n'.join(map(str, self))
|
||||
|
||||
def from_string(st):
|
||||
"Parses a string into content lines"
|
||||
try:
|
||||
# a fold is carriage return followed by either a space or a tab
|
||||
unfolded = FOLD.sub('', st)
|
||||
lines = [Contentline(line) for line in unfolded.splitlines() if line]
|
||||
lines.append('') # we need a '\r\n' in the end of every content line
|
||||
return Contentlines(lines)
|
||||
except:
|
||||
raise ValueError, 'Expected StringType with content lines'
|
||||
from_string = staticmethod(from_string)
|
||||
|
||||
|
||||
# ran this:
|
||||
# sample = open('./samples/test.ics', 'rb').read() # binary file in windows!
|
||||
# lines = Contentlines.from_string(sample)
|
||||
# for line in lines[:-1]:
|
||||
# print line.parts()
|
||||
|
||||
# got this:
|
||||
#('BEGIN', Parameters({}), 'VCALENDAR')
|
||||
#('METHOD', Parameters({}), 'Request')
|
||||
#('PRODID', Parameters({}), '-//My product//mxm.dk/')
|
||||
#('VERSION', Parameters({}), '2.0')
|
||||
#('BEGIN', Parameters({}), 'VEVENT')
|
||||
#('DESCRIPTION', Parameters({}), 'This is a very long description that ...')
|
||||
#('PARTICIPANT', Parameters({'CN': 'Max M'}), 'MAILTO:maxm@mxm.dk')
|
||||
#('DTEND', Parameters({}), '20050107T160000')
|
||||
#('DTSTART', Parameters({}), '20050107T120000')
|
||||
#('SUMMARY', Parameters({}), 'A second event')
|
||||
#('END', Parameters({}), 'VEVENT')
|
||||
#('BEGIN', Parameters({}), 'VEVENT')
|
||||
#('DTEND', Parameters({}), '20050108T235900')
|
||||
#('DTSTART', Parameters({}), '20050108T230000')
|
||||
#('SUMMARY', Parameters({}), 'A single event')
|
||||
#('UID', Parameters({}), '42')
|
||||
#('END', Parameters({}), 'VEVENT')
|
||||
#('END', Parameters({}), 'VCALENDAR')
|
||||
Reference in New Issue
Block a user