[Zodb-checkins] SVN: ZODB/trunk/src/ZODB/ Add a record iteration protocol to FileStorage. You can use the record iterator to iterate over all current revisions

Sun Mar 20 16:49:14 EST 2005

Log message for revision 29598:
  Add a record iteration protocol to FileStorage.  You can use the record iterator to iterate over all current revisions
  of data pickles in the storage.
  
  In order to support calling via ZEO, we don't implement this as an actual iterator.  An example of using the record iterator
  protocol is as follows:
  
  storage = FileStorage('anexisting.fs')
  next_oid = None
  while 1:
      oid, tid, data, next_oid = storage.record_iternext(next_oid)
      # do something with oid, tid and data
      if next_oid is None:
          break
  
  The behavior of the iteration protocol is now to iterate over all current records in the database in ascending oid order, 
  although this is not a promise to do so in the future.
  
  

Changed:
  U   ZODB/trunk/src/ZODB/FileStorage/FileStorage.py
  U   ZODB/trunk/src/ZODB/fsIndex.py
  U   ZODB/trunk/src/ZODB/tests/testFileStorage.py
  U   ZODB/trunk/src/ZODB/tests/testfsIndex.py

-=-
Modified: ZODB/trunk/src/ZODB/FileStorage/FileStorage.py
===================================================================

--- ZODB/trunk/src/ZODB/FileStorage/FileStorage.py	2005-03-20 21:40:14 UTC (rev 29597)
+++ ZODB/trunk/src/ZODB/FileStorage/FileStorage.py	2005-03-20 21:49:13 UTC (rev 29598)
@@ -371,8 +371,13 @@
             return None
         pos = long(pos)
 
-        if isinstance(index, DictType):
-            # Convert to fsIndex.
+        if (
+            isinstance(index, DictType) or
+            (isinstance(index, fsIndex) and isinstance(index._data, DictType))
+             ):
+            # Convert dictionary indexes to fsIndexes *or* convert fsIndexes
+            # which have a DictType `_data` attribute to a new fsIndex (newer
+            # fsIndexes have an OOBTree as `_data`)
             newindex = fsIndex()
             newindex.update(index)
             index = newindex
@@ -1397,7 +1402,20 @@
                 if e.errno != errno.ENOENT:
                     raise
 
+    def record_iternext(self, next=None):
+        index = self._index
+        oid = index.minKey(next)
+        
+        try:
+            next_oid = index.minKey(self.new_oid(oid))
+        except ValueError: # "empty tree" error
+            next_oid = None
 
+        data, tid = self.load(oid, None) # ignore versions
+        return oid, tid, data, next_oid
+        
+
+
 def shift_transactions_forward(index, vindex, tindex, file, pos, opos):
     """Copy transactions forward in the data file
 

Modified: ZODB/trunk/src/ZODB/fsIndex.py
===================================================================
--- ZODB/trunk/src/ZODB/fsIndex.py	2005-03-20 21:40:14 UTC (rev 29597)
+++ ZODB/trunk/src/ZODB/fsIndex.py	2005-03-20 21:49:13 UTC (rev 29598)
@@ -39,6 +39,7 @@
 import struct
 
 from BTrees._fsBTree import fsBucket
+from BTrees.OOBTree import OOBTree
 
 # convert between numbers and six-byte strings
 
@@ -48,10 +49,18 @@
 def str2num(s):
     return struct.unpack(">Q", "\000\000" + s)[0]
 
+def prefix_plus_one(s):
+    num = str2num(s)
+    return num2str(num + 1)
+
+def prefix_minus_one(s):
+    num = str2num(s)
+    return num2str(num - 1)
+
 class fsIndex(object):
 
     def __init__(self):
-        self._data = {}
+        self._data = OOBTree()
 
     def __getitem__(self, key):
         return str2num(self._data[key[:6]][key[6:]])
@@ -126,31 +135,61 @@
     def values(self):
         return list(self.itervalues())
 
-    def maxKey(self):
-        # This is less general than the BTree method of the same name:  we
-        # only care about the largest key in the entire tree.  By
-        # construction, that's the largest oid in use in the associated
-        # FileStorage.
+    # Comment below applies for the following minKey and maxKey methods
+    #
+    # Obscure:  what if `tree` is actually empty?  We're relying here on
+    # that this class doesn't implement __delitem__:  once a key gets
+    # into an fsIndex, the only way it can go away is by invoking
+    # clear().  Therefore nothing in _data.values() is ever empty.
+    #
+    # Note that because `tree` is an fsBTree, its minKey()/maxKey() methods are
+    # very efficient.
 
-        keys = self._data.keys()
-        if not keys:
-            # This is the same exception a BTree maxKey() raises when the
-            # tree is empty.
-            raise ValueError("empty tree")
+    def minKey(self, key=None):
+        if key is None:
+            smallest_prefix = self._data.minKey()
+        else:
+            smallest_prefix = self._data.minKey(key[:6])
+            
+        tree = self._data[smallest_prefix]
 
-        # We expect that keys is small, since each fsBTree in _data.values()
-        # can hold as many as 2**16 = 64K entries.  So this max() should go
-        # fast too.  Regardless, there's no faster way to find the largest
-        # prefix.
-        biggest_prefix = max(keys)
+        assert tree
+
+        if key is None:
+            smallest_suffix = tree.minKey()
+        else:
+            try:
+                smallest_suffix = tree.minKey(key[6:])
+            except ValueError: # 'empty tree' (no suffix >= arg)
+                next_prefix = prefix_plus_one(smallest_prefix)
+                smallest_prefix = self._data.minKey(next_prefix)
+                tree = self._data[smallest_prefix]
+                assert tree
+                smallest_suffix = tree.minKey()
+
+        return smallest_prefix + smallest_suffix
+
+    def maxKey(self, key=None):
+        if key is None:
+            biggest_prefix = self._data.maxKey()
+        else:
+            biggest_prefix = self._data.maxKey(key[:6])
+
         tree = self._data[biggest_prefix]
 
-        # Obscure:  what if tree is actually empty?  We're relying here on
-        # that this class doesn't implement __delitem__:  once a key gets
-        # into an fsIndex, the only way it can go away is by invoking
-        # clear().  Therefore nothing in _data.values() is ever empty.
-        #
-        # Note that because `tree` is an fsBTree, its maxKey() method is very
-        # efficient.
         assert tree
-        return biggest_prefix + tree.maxKey()
+
+        if key is None:
+            biggest_suffix = tree.maxKey()
+        else:
+            try:
+                biggest_suffix = tree.maxKey(key[6:])
+            except ValueError: # 'empty tree' (no suffix <= arg)
+                next_prefix = prefix_minus_one(biggest_prefix)
+                biggest_prefix = self._data.maxKey(next_prefix)
+                tree = self._data[biggest_prefix]
+                assert tree
+                biggest_suffix = tree.maxKey()
+
+        return biggest_prefix + biggest_suffix
+

Modified: ZODB/trunk/src/ZODB/tests/testFileStorage.py
===================================================================
--- ZODB/trunk/src/ZODB/tests/testFileStorage.py	2005-03-20 21:40:14 UTC (rev 29597)
+++ ZODB/trunk/src/ZODB/tests/testFileStorage.py	2005-03-20 21:49:13 UTC (rev 29598)
@@ -137,6 +137,41 @@
         # Python dict.
         self.check_conversion_to_fsIndex(read_only=True)
 
+    def check_conversion_from_dict_to_btree_data_in_fsIndex(self):
+        # To support efficient range searches on its keys as part of
+        # implementing a record iteration protocol in FileStorage, we
+        # converted the fsIndex class from using a dictionary as its
+        # self._data attribute to using an OOBTree in its stead.
+
+        from ZODB.fsIndex import fsIndex
+        from BTrees.OOBTree import OOBTree
+
+        # Create some data, and remember the index.
+        for i in range(10):
+            self._dostore()
+        data_dict = dict(self._storage._index._data)
+
+        # Replace the OOBTree with a dictionary and commit it.
+        self._storage._index._data = data_dict
+        get_transaction().commit()
+
+        # Save the index.
+        self._storage.close()
+
+        # Verify it's converted to fsIndex in memory upon open.
+        self.open()
+        self.assert_(isinstance(self._storage._index, fsIndex))
+        self.assert_(isinstance(self._storage._index._data, OOBTree))
+
+        # Verify it has the right content.
+        new_data_dict = dict(self._storage._index._data)
+        self.assertEqual(len(data_dict), len(new_data_dict))
+
+        for k in data_dict:
+            old_tree = data_dict[k]
+            new_tree = new_data_dict[k]
+            self.assertEqual(list(old_tree.items()), list(new_tree.items()))
+
     def check_save_after_load_with_no_index(self):
         for i in range(10):
             self._dostore()
@@ -288,6 +323,35 @@
         else:
             self.fail("expected CorruptedError")
 
+    def check_record_iternext(self):
+        from ZODB.DB import DB
+
+        db = DB(self._storage)
+        conn = db.open()
+        conn.root()['abc'] = MinPO('abc')
+        conn.root()['xyz'] = MinPO('xyz')
+        get_transaction().commit()
+
+        # Ensure it's all on disk.
+        db.close()
+        self._storage.close()
+
+        self.open()
+
+        key = None
+        for x in ('\000', '\001', '\002'):
+            oid, tid, data, next_oid = self._storage.record_iternext(key)
+            self.assertEqual(oid, ('\000' * 7) + x)
+            key = next_oid
+            expected_data, expected_tid = self._storage.load(oid, '')
+            self.assertEqual(expected_data, data)
+            self.assertEqual(expected_tid, tid)
+            if x == '\002':
+                self.assertEqual(next_oid, None)
+            else:
+                self.assertNotEqual(next_oid, None)
+
+
 class FileStorageRecoveryTest(
     StorageTestBase.StorageTestBase,
     RecoveryStorage.RecoveryStorage,

Modified: ZODB/trunk/src/ZODB/tests/testfsIndex.py
===================================================================
--- ZODB/trunk/src/ZODB/tests/testfsIndex.py	2005-03-20 21:40:14 UTC (rev 29597)
+++ ZODB/trunk/src/ZODB/tests/testfsIndex.py	2005-03-20 21:49:13 UTC (rev 29598)
@@ -15,7 +15,7 @@
 import random
 
 from ZODB.fsIndex import fsIndex
-from ZODB.utils import p64
+from ZODB.utils import p64, z64
 
 
 class Test(unittest.TestCase):
@@ -130,6 +130,44 @@
             index_max = index.maxKey()
             self.assertEqual(index_max, correct_max)
 
+        index.clear()
+        a = '\000\000\000\000\000\001\000\000'
+        b = '\000\000\000\000\000\002\000\000'
+        c = '\000\000\000\000\000\003\000\000'
+        d = '\000\000\000\000\000\004\000\000'
+        index[a] = 1
+        index[c] = 2
+        self.assertEqual(index.maxKey(b), a)
+        self.assertEqual(index.maxKey(d), c)
+        self.assertRaises(ValueError, index.maxKey, z64)
+
+    def testMinKey(self):
+        index = self.index
+        index.clear()
+
+        # An empty index should complain.
+        self.assertRaises(ValueError, index.minKey)
+
+        # Now build up a tree with random values, and check maxKey at each
+        # step.
+        correct_min = "\xff" * 8   # bigger than anything we'll add
+        for i in range(1000):
+            key = p64(random.randrange(100000000))
+            index[key] = i
+            correct_min = min(correct_min, key)
+            index_min = index.minKey()
+            self.assertEqual(index_min, correct_min)
+
+        index.clear()
+        a = '\000\000\000\000\000\001\000\000'
+        b = '\000\000\000\000\000\002\000\000'
+        c = '\000\000\000\000\000\003\000\000'
+        d = '\000\000\000\000\000\004\000\000'
+        index[a] = 1
+        index[c] = 2
+        self.assertEqual(index.minKey(b), c)
+        self.assertRaises(ValueError, index.minKey, d)
+
 def test_suite():
     loader=unittest.TestLoader()
     return loader.loadTestsFromTestCase(Test)