[Zodb-checkins] SVN: ZODB/trunk/src/ Added fsIndex save method and fsIndex load class method for saving and
Jim Fulton
jim at zope.com
Tue Feb 2 11:50:46 EST 2010
Log message for revision 108720:
Added fsIndex save method and fsIndex load class method for saving and
loading index data. This leverages the new fsBucket toString and
fromString methods and provides much faster FileStorage index saving and loading
and smaller index files. On my machine, saves are 5 times faster and
loads are 20 times faster (after a save, when data are in disk
cache). Indexes are roughly 30% smaller.
The index format has changed. Old indexes can be read just fine, but
new indexes won't be readable by older versions of ZODB.
Changed:
U ZODB/trunk/src/CHANGES.txt
U ZODB/trunk/src/ZODB/FileStorage/FileStorage.py
U ZODB/trunk/src/ZODB/fsIndex.py
U ZODB/trunk/src/ZODB/tests/testFileStorage.py
U ZODB/trunk/src/ZODB/tests/testfsIndex.py
-=-
Modified: ZODB/trunk/src/CHANGES.txt
===================================================================
--- ZODB/trunk/src/CHANGES.txt 2010-02-02 16:50:44 UTC (rev 108719)
+++ ZODB/trunk/src/CHANGES.txt 2010-02-02 16:50:46 UTC (rev 108720)
@@ -2,12 +2,16 @@
Change History
================
-3.10.0a1 (2009-12-??)
+3.10.0a1 (2010-02-??)
=====================
New Features
------------
+- FileStorage indexes use a new format. They are saved and loaded much
+ faster and take less space. Old indexes can still be read, but new
+ indexes won't be readable by older versions of ZODB.
+
- The API for undoing multiple transactions has changed. To undo
multiple transactions in a single transaction, pass pass a list of
transaction identifiers to a database's undoMultiple method. Calling a
Modified: ZODB/trunk/src/ZODB/FileStorage/FileStorage.py
===================================================================
--- ZODB/trunk/src/ZODB/FileStorage/FileStorage.py 2010-02-02 16:50:44 UTC (rev 108719)
+++ ZODB/trunk/src/ZODB/FileStorage/FileStorage.py 2010-02-02 16:50:46 UTC (rev 108720)
@@ -246,24 +246,8 @@
index_name = self.__name__ + '.index'
tmp_name = index_name + '.index_tmp'
- f=open(tmp_name,'wb')
- p=Pickler(f,1)
+ self._index.save(self._pos, tmp_name)
- # Pickle the index buckets first to avoid deep recursion:
- buckets = []
- bucket = self._index._data._firstbucket
- while bucket is not None:
- buckets.append(bucket)
- bucket = bucket._next
- buckets.reverse()
-
- info=BTrees.OOBTree.Bucket(dict(
- _buckets=buckets, index=self._index, pos=self._pos))
-
- p.dump(info)
- f.flush()
- f.close()
-
try:
try:
os.remove(index_name)
@@ -357,19 +341,15 @@
file_name=self.__name__
index_name=file_name+'.index'
- try:
- f = open(index_name, 'rb')
- except:
+ if os.path.exists(index_name):
+ try:
+ info = fsIndex.load(index_name)
+ except:
+ logger.exception('loading index')
+ return None
+ else:
return None
- p=Unpickler(f)
-
- try:
- info=p.load()
- except:
- exc, err = sys.exc_info()[:2]
- logger.warning("Failed to load database index: %s: %s", exc, err)
- return None
index = info.get('index')
pos = info.get('pos')
if index is None or pos is None:
Modified: ZODB/trunk/src/ZODB/fsIndex.py
===================================================================
--- ZODB/trunk/src/ZODB/fsIndex.py 2010-02-02 16:50:44 UTC (rev 108719)
+++ ZODB/trunk/src/ZODB/fsIndex.py 2010-02-02 16:50:46 UTC (rev 108720)
@@ -39,6 +39,7 @@
# bytes back before using u64 to convert the data back to (long)
# integers.
+import cPickle
import struct
from BTrees._fsBTree import fsBucket
@@ -62,12 +63,62 @@
class fsIndex(object):
- def __init__(self):
+ def __init__(self, data=None):
self._data = OOBTree()
+ if data:
+ self.update(data)
+ def __getstate__(self):
+ return dict(
+ state_version = 1,
+ _data = [(k, v.toString())
+ for (k, v) in self._data.iteritems()
+ ]
+ )
+
+ def __setstate__(self, state):
+ version = state.pop('state_version', 0)
+ getattr(self, '_setstate_%s' % version)(state)
+
+ def _setstate_0(self, state):
+ self.__dict__.clear()
+ self.__dict__.update(state)
+
+ def _setstate_1(self, state):
+ self._data = OOBTree([
+ (k, fsBucket().fromString(v))
+ for (k, v) in state['_data']
+ ])
+
def __getitem__(self, key):
return str2num(self._data[key[:6]][key[6:]])
+ def save(self, pos, fname):
+ with open(fname, 'wb') as f:
+ pickler = cPickle.Pickler(f, 1)
+ pickler.fast = True
+ pickler.dump(pos)
+ for k, v in self._data.iteritems():
+ pickler.dump((k, v.toString()))
+ pickler.dump(None)
+
+ @classmethod
+ def load(class_, fname):
+ with open(fname, 'rb') as f:
+ unpickler = cPickle.Unpickler(f)
+ pos = unpickler.load()
+ if not isinstance(pos, (int, long)):
+ return pos # Old format
+ index = class_()
+ data = index._data
+ while 1:
+ v = unpickler.load()
+ if not v:
+ break
+ k, v = v
+ data[k] = fsBucket().fromString(v)
+ return dict(pos=pos, index=index)
+
def get(self, key, default=None):
tree = self._data.get(key[:6], default)
if tree is default:
Modified: ZODB/trunk/src/ZODB/tests/testFileStorage.py
===================================================================
--- ZODB/trunk/src/ZODB/tests/testFileStorage.py 2010-02-02 16:50:44 UTC (rev 108719)
+++ ZODB/trunk/src/ZODB/tests/testFileStorage.py 2010-02-02 16:50:46 UTC (rev 108720)
@@ -11,6 +11,7 @@
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
+import cPickle
import os, unittest
import transaction
import ZODB.FileStorage
@@ -19,6 +20,7 @@
import zope.testing.setupstack
from ZODB import POSException
from ZODB import DB
+from ZODB.fsIndex import fsIndex
from ZODB.tests import StorageTestBase, BasicStorage, TransactionalUndoStorage
from ZODB.tests import PackableStorage, Synchronization, ConflictResolution
@@ -69,7 +71,6 @@
self.fail("expect long user field to raise error")
def check_use_fsIndex(self):
- from ZODB.fsIndex import fsIndex
self.assertEqual(self._storage._index.__class__, fsIndex)
@@ -78,21 +79,13 @@
def convert_index_to_dict(self):
# Convert the index in the current .index file to a Python dict.
# Return the index originally found.
- import cPickle as pickle
-
- f = open('FileStorageTests.fs.index', 'r+b')
- p = pickle.Unpickler(f)
- data = p.load()
+ data = fsIndex.load('FileStorageTests.fs.index')
index = data['index']
newindex = dict(index)
data['index'] = newindex
- f.seek(0)
- f.truncate()
- p = pickle.Pickler(f, 1)
- p.dump(data)
- f.close()
+ cPickle.dump(data, open('FileStorageTests.fs.index', 'wb'), 1)
return index
def check_conversion_to_fsIndex(self, read_only=False):
Modified: ZODB/trunk/src/ZODB/tests/testfsIndex.py
===================================================================
--- ZODB/trunk/src/ZODB/tests/testfsIndex.py 2010-02-02 16:50:44 UTC (rev 108719)
+++ ZODB/trunk/src/ZODB/tests/testfsIndex.py 2010-02-02 16:50:46 UTC (rev 108720)
@@ -11,11 +11,13 @@
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
+import doctest
+import random
import unittest
-import random
from ZODB.fsIndex import fsIndex
from ZODB.utils import p64, z64
+from ZODB.tests.util import setUp, tearDown
class Test(unittest.TestCase):
@@ -30,7 +32,7 @@
index = self.index
self.assert_(p64(1000) in index)
self.assert_(p64(100*1000) in index)
-
+
del self.index[p64(1000)]
del self.index[p64(100*1000)]
@@ -186,9 +188,44 @@
self.assertEqual(index.minKey(b), c)
self.assertRaises(ValueError, index.minKey, d)
+def fsIndex_save_and_load():
+ """
+fsIndex objects now have save methods for saving them to disk in a new
+format. The fsIndex class has a load class method that can load data.
+
+Let's start by creating an fsIndex. We'll bother to allocate the
+object ids to get multiple buckets:
+
+ >>> index = fsIndex(dict((p64(i), i) for i in xrange(0, 1<<28, 1<<15)))
+ >>> len(index._data)
+ 4096
+
+Now, we'll save the data to disk and then load it:
+
+ >>> index.save(42, 'index')
+
+Note that we pass a file position, which gets saved with the index data.
+
+ >>> info = fsIndex.load('index')
+ >>> info['pos']
+ 42
+ >>> info['index'].__getstate__() == index.__getstate__()
+ True
+
+If we save the data in the old format, we can still read it:
+
+ >>> import cPickle
+ >>> cPickle.dump(dict(pos=42, index=index), open('old', 'wb'), 1)
+ >>> info = fsIndex.load('old')
+ >>> info['pos']
+ 42
+ >>> info['index'].__getstate__() == index.__getstate__()
+ True
+
+ """
+
def test_suite():
- loader=unittest.TestLoader()
- return loader.loadTestsFromTestCase(Test)
-
-if __name__=='__main__':
- unittest.TextTestRunner().run(test_suite())
+ suite = unittest.TestSuite()
+ suite.addTest(unittest.makeSuite(Test))
+ suite.addTest(doctest.DocTestSuite(setUp=setUp, tearDown=tearDown))
+ return suite
More information about the Zodb-checkins
mailing list