On Thu, 7 Oct 1999, Ben Leslie wrote:
Hi The!
On Tue, 05 Oct 1999, The Dragon De Monsyne wrote:
On Tue, 28 Sep 1999, Michel Pelletier wrote:
Greetings,
I finally got sick of paging through endless archive messages, so I implimented an expirimental searchable list archive:
[snip]
Hmmm! Whaddayaknow! this is exactly what I've been working on! I've been planning out a product called MessageBase to do this. I'm sketching out the Message class right now. I'm planning on it having full MIME suport. (one of the things I have gotten done so far is an imporved version of python's mimetools module thast is actually compliant to the MIME RFC's)
Umm any chance you could send me a copy of these? I have been working on the NotMail product and have got it viewing MIME messages slightly better, however an improved mimetools would certainly make my life easier and code neater.
Sure! there was one more improvement I wanted to make, which is why it took a few days to reply, here's the module. -The Dragon De Monsyne # Various tools used by MIME-reading or MIME-writing programs. import os import rfc822 import string import tempfile from UserDict import UserDict # A derived class of rfc822.Message that knows about MIME headers and # contains some hooks for decoding encoded and multipart messages. class Message(rfc822.Message): def __init__(self, fp, seekable = 1): rfc822.Message.__init__(self, fp, seekable) self.encodingheader = \ self.getheader('content-transfer-encoding') self.typeheader = \ self.getheader('content-type') self.dispheader = \ self.getheader('content-disposition') self.parsetype() self.parseplist() def parsetype(self): str = self.typeheader if str == None: str = 'text/plain' if ';' in str: i = string.index(str, ';') self.plisttext = str[i:] str = str[:i] else: self.plisttext = '' fields = string.splitfields(str, '/') for i in range(len(fields)): fields[i] = string.lower(string.strip(fields[i])) self.type = string.joinfields(fields, '/') self.maintype = fields[0] self.subtype = string.joinfields(fields[1:], '/') if self.dispheader: if ';' in self.dispheader: i = string.index(self.dispheader, ';') self.dplisttext = self.dispheader[i:] self.disposition = self.dispheader[:i] else: self.dplisttext = '' self.disposition = self.dispheader else: self.dplisttext = '' self.disposition = None def parseplist(self): self.plist=[] self.type_params=parseplist(self.plisttext) for each in self.type_params.items(): self.plist.append("%s=%s" % each) self.disp_params = parseplist(self.dplisttext) def getplist(self): return self.plist def gettype_params(self): return self.type_params def getdisp_params(self): return self.disp_params def gettype_param(self, name): name = string.lower(name) if self.type_params.has_key(name): return self.type_params[name] else: return None def getdisp_param(self, name): name = string.lower(name) if self.disp_params.has_key(name): return self.type_params[name] else: return None def getdisp_paramnames(self): return self.disp_params.keys() def gettype_paramnames(self): return self.type_params.keys() # for backwards compatibillity -ddm getparam = gettype_param getparamnames = gettype_paramnames def getencoding(self): if self.encodingheader == None: return '7bit' return string.lower(self.encodingheader) def getdisposition(self): return self.disposition def gettype(self): return self.type def getmaintype(self): return self.maintype def getsubtype(self): return self.subtype class I18nDict(UserDict): """Dictionary-like class that keeps track of i18n data (specifically language and character set) for each of it's entries. adds four methods: d.get_charset(key) d.get_lang(key) d.set_charset(key,value) d.set_lang(key,value) for getting and setting i18n data for each key. the get_* methods return None if there is no i18n data for that key Attempting to set i18n data for a nonexistant key throws a KeyError. """ def __init__(self, dict=None): self.charset_data={} self.lang_data={} UserDict.__init__(self) if dict is not None: self.update(dict) def __delitem__(self, key): del self.data[key] try: del self.charset_data[key] except KeyError: pass try: del self.lang_data[key] except KeyError: pass def update(self, dict): if isinstance(dict, I18nDict): self.data.update(dict.data) for k in dict.keys(): self.set_lang(k,dict.get_lang(k)) self.set_charset(k,dict.get_charset(k)) else: UserDict.update(self,dict) def clear(self): self.data.clear() self.charset_data.clear() self.lang_data.clear() def copy(self): if self.__class__ is I18nDict: return I18nDict(self.data) else: import copy return copy.copy(self) def set_lang(self,key,val=None): if self.has_key(key): if val is not None: self.lang_data[key]=val else: try: del self.lang_data[key] except KeyError: pass else: raise KeyError,key def set_charset(self,key,val=None): if self.has_key(key): if val is not None: self.charset_data[key]=val else: try: del self.charset_data[key] except KeyError: pass else: raise KeyError,key def get_lang(self,key,failobj=None): return self.lang_data.get(key,failobj) def get_charset(self,key,failobj=None): return self.charset_data.get(key,failobj) # Utility functions # ----------------- #parses a MIME parameter list def parseplist(pstr): """ Parse a MIME parameter list. Should comply to rfc2045, rfc2231. -- The Dragon De Monsyne <dragondm@integral.org> Will decode rfc2231 % escapes and param 'paramname*<number>' folding. Also breaks off lang and charset info, if it is specified using MIME 'charset'lang'value extended notation. Rfc 822 comments should be properly ignored. Unquoted whitespace is also ignored. this parser tries to be pathalogically tolerant. NOTES: (1) This function actually returns a dictionary-like class instance (an instance of mimetools.I18nDict). Any lang or charset info present for an item can be obtained using that class's get_lang and get_charset methods. Otherwise the object returned acts like a normal dictionary. (2) This function returns a dictionary(-like object) whose keys are param names, (mapped to lower case) and whose values are the matching param values. This is different to the mimetools Message class's parseplist() method which creates a list of "param=value" strings. IMHO, that format is unneccesary, and simply makes more work for the programmer, as: a) MIME parameter lists are explicitly unordered (so sayeth the rfc) and the only other reason to use a list, multiple parameters with the same name, while not explicitly disallowed, would implicitly violate rfc2231 (to explain that: rfc2231 states that parameters may be broken-up (i.e. by user-agents) across multiple parameters by using a special syntax (paramname*sequence-number), and that this syntax must be transparent to MIME. It also states that some systems (e.g IMAP servers) must automatically decode such encoding for certain parameters. Since MIME parameter lists are unordered, multiple parameters with the same name would break this syntax, if two such parameters were 'folded', there would be no way of decoding what part went to which.) """ comment=0 params=I18nDict() thischr='' lastchr='' p=[] v=[] isValue=0 pstrlist=list(pstr) pstrlist.reverse() while pstrlist: lastchr=thischr thischr=pstrlist.pop() if thischr == ';': ppstr=string.lower(string.strip(string.join(p,''))) vpstr=string.join(v,'') if ppstr: params[ppstr]=vpstr p=[] v=[] isValue=0 elif thischr =='(': comment=comment+1 while pstrlist and comment: lastchr=thischr thischr = pstrlist.pop() if thischr == '(' and lastchr <>'\\': comment=comment+1 elif thischr == ')' and lastchr <>'\\': comment=comment-1 elif thischr =='=' : if len(p) >0: if p[-1] == '*': # rfc 2231 describes different semantics # for extended param values. Amongst other things # quoted strings aren't allowed, but % escapes are. -ddm #print "2: %s %s" % (str(p),p[-1]) isValue=2 else: #print "1: %s %s" % (str(p),p[-1]) isValue=1 elif thischr=='"' and isValue==1 : while pstrlist: lastchr = thischr thischr=pstrlist.pop() if thischr == '"' and lastchr<>'\\': break if thischr == '\\' and lastchr <>'\\': pass else: v.append(thischr) elif thischr=='%' and isValue==2 : xd1='' xd2='' try: xd1=pstrlist.pop() xd2=pstrlist.pop() try: v.append(chr(string.atoi(xd1+xd2,16))) except ValueError: #some bogus % escape ?? # mebbe someone fergot to quote something # pass it as it. -ddm v.append('%') v.append(xd1) v.append(xd2) lastchr=xd2 except IndexError: #blagh. Something got truncated, or bad % escape #pass as-is -ddm v.append('%') lastchr='%' if xd1: v.append(xd1) lastchr=xd1 elif thischr in string.whitespace: pass else: if isValue: v.append(thischr) else: p.append(thischr) ppstr=string.lower(string.strip(string.join(p,''))) vpstr=string.join(v,'') if ppstr: params[ppstr]=vpstr fparams={} #undo rfc2231 param folding. -ddm for each in params.keys(): if each[-1]=='*': paramname=each[:-1] else: paramname=each if '*' in paramname: pl=string.split(paramname,'*',1) if len(pl) <>2: break try: paramnum=string.atoi(pl[1],10) except ValueError: pass else: if paramnum==0: if each[-1]=='*': if not fparams.has_key(pl[0]+"*"): if fparams.has_key(pl[0]): fparams[pl[0]+"*"]=fparams[pl[0]] del fparams[pl[0]] else: fparams[pl[0]+"*"]={} fparams[pl[0]+'*'][0]=params[each] else: if not fparams.has_key(pl[0]): fparams[pl[0]]={} fparams[pl[0]][0]=params[each] else: if fparams.has_key(pl[0]+'*'): fparams[pl[0]+'*'][paramnum]=params[each] else: if not fparams.has_key(pl[0]): fparams[pl[0]]={} fparams[pl[0]][paramnum]=params[each] del params[each] for each in fparams.keys(): l=fparams[each].keys() l.sort() val="" for i in l: val=val+fparams[each][i] params[each]=val for each in params.keys(): if each[-1]== '*': k=string.strip(each[:-1]) v=params[each] del params[each] if not k: continue vl = string.split(v,"'",2) if len(vl)==3: params[k]=vl[2] charset=string.strip(vl[0]) lang=string.strip(vl[1]) if not charset: charset=None if not lang: lang=None params.set_charset(k,charset) params.set_lang(k,lang) else: # Something bogus here, punt -ddm params[k]=v return params def test_parseplist(): pptest = ['; this=that ;theotherthing="some thing here; really"; foo=bar', 'this=that; foo=bar', """;title*0*=us-ascii'en'This%20is%20even%20more%20 ;title*1*=%2A%2A%2Afun%2A%2A%2A%20 ;title*2="isn't it!" """, 'this=that (A comment; Really) ; foo=bar', 'this="that (hi!)"; (A comment; Really) ; foo=bar'] for each in pptest: print "-----text-----" print each print "-----parsed---" print parseplist(each) print "--------------" # Return a random string usable as a multipart boundary. # The method used is so that it is *very* unlikely that the same # string of characters will every occur again in the Universe, # so the caller needn't check the data it is packing for the # occurrence of the boundary. # # The boundary contains dots so you have to quote it in the header. _prefix = None def choose_boundary(): global _prefix import time import random if _prefix == None: import socket import os hostid = socket.gethostbyname(socket.gethostname()) try: uid = `os.getuid()` except: uid = '1' try: pid = `os.getpid()` except: pid = '1' _prefix = hostid + '.' + uid + '.' + pid timestamp = '%.3f' % time.time() seed = `random.randint(0, 32767)` return _prefix + '.' + timestamp + '.' + seed # Subroutines for decoding some common content-transfer-types def decode(input, output, encoding): if encoding == 'base64': import base64 return base64.decode(input, output) if encoding == 'quoted-printable': import quopri return quopri.decode(input, output) if encoding in ('uuencode', 'x-uuencode', 'uue', 'x-uue'): import uu return uu.decode(input, output) if decodetab.has_key(encoding): pipethrough(input, decodetab[encoding], output) else: raise ValueError, \ 'unknown Content-Transfer-Encoding: %s' % encoding def encode(input, output, encoding): if encoding == 'base64': import base64 return base64.encode(input, output) if encoding == 'quoted-printable': import quopri return quopri.encode(input, output, 0) if encoding in ('uuencode', 'x-uuencode', 'uue', 'x-uue'): import uu return uu.encode(input, output) if encodetab.has_key(encoding): pipethrough(input, encodetab[encoding], output) else: raise ValueError, \ 'unknown Content-Transfer-Encoding: %s' % encoding # The following is no longer used for standard encodings # XXX This requires that uudecode and mmencode are in $PATH uudecode_pipe = '''( TEMP=/tmp/@uu.$$ sed "s%^begin [0-7][0-7]* .*%begin 600 $TEMP%" | uudecode cat $TEMP rm $TEMP )''' decodetab = { 'uuencode': uudecode_pipe, 'x-uuencode': uudecode_pipe, 'uue': uudecode_pipe, 'x-uue': uudecode_pipe, 'quoted-printable': 'mmencode -u -q', 'base64': 'mmencode -u -b', } encodetab = { 'x-uuencode': 'uuencode tempfile', 'uuencode': 'uuencode tempfile', 'x-uue': 'uuencode tempfile', 'uue': 'uuencode tempfile', 'quoted-printable': 'mmencode -q', 'base64': 'mmencode -b', } def pipeto(input, command): pipe = os.popen(command, 'w') copyliteral(input, pipe) pipe.close() def pipethrough(input, command, output): tempname = tempfile.mktemp() try: temp = open(tempname, 'w') except IOError: print '*** Cannot create temp file', `tempname` return copyliteral(input, temp) temp.close() pipe = os.popen(command + ' <' + tempname, 'r') copybinary(pipe, output) pipe.close() os.unlink(tempname) def copyliteral(input, output): while 1: line = input.readline() if not line: break output.write(line) def copybinary(input, output): BUFSIZE = 8192 while 1: line = input.read(BUFSIZE) if not line: break output.write(line)