[Zope3-checkins] CVS: Zope3/src/zodb/tests - emailbench.py:1.1
Jeremy Hylton
jeremy@zope.com
Mon, 21 Apr 2003 14:00:43 -0400
Update of /cvs-repository/Zope3/src/zodb/tests
In directory cvs.zope.org:/tmp/cvs-serv25669
Added Files:
emailbench.py
Log Message:
For lack of anything better, a simple benchmark of indexing email headers.
=== Added File Zope3/src/zodb/tests/emailbench.py ===
"""A simple benchmark of creating and indexing email messages.
The benchmark loads and indexes a set of email messages from a Unix
mailbox. The sender, subject, and message-id are used as index keys.
The benchmark commits every %(COMMIT_INTERVAL)d messages and reads at
most %(MAX)d messages from the mailbox. At the end of the test run,
it packs the storage. (The periodic packs on the way, along with the
index updates, will generate many data records that can be packed.)
It prints a one-line summary of its activities.
500 100 11.81 2253868 2084414
The columns are:
- number of messages read
- commit interval
- elapsed CPU time
- size of file before pack
- size of file after pack
"""
import email
import mailbox
import os
import stat
import sys
import time
from persistence import Persistent
from persistence.dict import PersistentDict
from transaction import get_transaction
from zodb.db import DB
from zodb.storage.file import FileStorage
from zodb.btrees.OOBTree import OOBTree, OOSet
COMMIT_INTERVAL = 200
MAX = 5000
class Mailbox(Persistent):
def __init__(self):
self.messages = OOBTree()
self.subjects = OOBTree()
self.senders = OOBTree()
self.size = 0
def __len__(self):
return self.size
def add(self, msg):
self.messages[msg.msgid] = msg
if msg.sender:
set = self.senders.get(msg.sender)
if set is None:
set = self.senders[msg.sender] = OOSet()
set.insert(msg)
if msg.subject:
set = self.subjects.get(msg.subject)
if set is None:
set = self.subjects[msg.subject] = OOSet()
set.insert(msg)
self.size += 1
class Message(Persistent):
def __init__(self, sender, recipients, subject, msgid, headers, payload):
self.sender = sender
self.recipients = recipients
self.subject = subject
if msgid is None:
msgid = "%s %s" % (time.ctime(), id(self))
self.msgid = msgid
self.headers = headers
self.payload = payload
def fromEmail(cls, msg):
recipients = []
for h in "to", "cc", "bcc":
v = msg.get(h)
if v is not None:
recipients.append(v)
headers = PersistentDict()
headers.update(msg)
return cls(msg.get("from"), recipients, msg.get("subject"),
msg.get("message-id"), headers, msg.get_payload())
fromEmail = classmethod(fromEmail)
def main(path):
f = open(path, "rb")
fs = FileStorage("emailbench.fs", create=True)
db = DB(fs)
cn = db.open()
root = cn.root()
mbox = root["mailbox"] = Mailbox()
get_transaction().commit()
factory = mailbox.UnixMailbox(f, email.message_from_file)
while 1:
raw = factory.next()
if raw is None:
break
msg = Message.fromEmail(raw)
mbox.add(msg)
if len(mbox) % COMMIT_INTERVAL == 0:
get_transaction().commit()
if len(mbox) >= MAX:
break
n = len(mbox)
get_transaction().commit()
fs.pack(time.time())
db.close()
return n
def size(path):
return os.stat(path)[stat.ST_SIZE]
if __name__ == "__main__":
path = sys.argv[1]
t0 = time.clock()
n = main(path)
t1 = time.clock()
print n, COMMIT_INTERVAL, t1 - t0, size("emailbench.fs.old"), \
size("emailbench.fs")