[Zope-Checkins] SVN: Zope/branches/2.9/lib/python/Shared/DC/ZRDB/ - actively remove entries from the cache when they are found to be stale. This removes the possible causes of both the KeyError bug that Brian patched in Dec 2000 and the KeyError bug reported in #2212

Chris Withers chris at simplistix.co.uk
Fri Nov 17 12:23:23 EST 2006


Log message for revision 71160:
  - actively remove entries from the cache when they are found to be stale. This removes the possible causes of both the KeyError bug that Brian patched in Dec 2000 and the KeyError bug reported in #2212
  - remove arbitary "divide by 2" on when we start checking to empty the cache and tcache, it looks like this may have once made sense when the tcache could grow without the cache growing, but I still can't really see how it made sense :-S
  - fix of-by-one errors on cache size

Changed:
  U   Zope/branches/2.9/lib/python/Shared/DC/ZRDB/DA.py
  U   Zope/branches/2.9/lib/python/Shared/DC/ZRDB/tests/test_caching.py

-=-
Modified: Zope/branches/2.9/lib/python/Shared/DC/ZRDB/DA.py
===================================================================
--- Zope/branches/2.9/lib/python/Shared/DC/ZRDB/DA.py	2006-11-17 16:56:34 UTC (rev 71159)
+++ Zope/branches/2.9/lib/python/Shared/DC/ZRDB/DA.py	2006-11-17 17:23:22 UTC (rev 71160)
@@ -361,20 +361,25 @@
         max_cache=self.max_cache_
         now=time()
         t=now-self.cache_time_
-        if len(cache) > max_cache / 2:
+        if len(cache) >= max_cache:
             keys=tcache.keys()
             keys.reverse()
-            while keys and (len(keys) > max_cache or keys[-1] < t):
+            while keys and (len(keys) >= max_cache or keys[-1] < t):
                 key=keys[-1]
                 q=tcache[key]
                 del tcache[key]
-                if cache[q][0] == key:
-                    del cache[q]
+                del cache[q]
                 del keys[-1]
 
         if cache.has_key(cache_key):
             k, r = cache[cache_key]
-            if k > t: return r
+            if k > t:
+                # yay! a cached result returned!
+                return r
+            else:
+                # delete stale cache entries
+                del cache[cache_key]
+                del tcache[k]
 
         # call the pure query
         result=DB__.query(query,max_rows)

Modified: Zope/branches/2.9/lib/python/Shared/DC/ZRDB/tests/test_caching.py
===================================================================
--- Zope/branches/2.9/lib/python/Shared/DC/ZRDB/tests/test_caching.py	2006-11-17 16:56:34 UTC (rev 71159)
+++ Zope/branches/2.9/lib/python/Shared/DC/ZRDB/tests/test_caching.py	2006-11-17 17:23:22 UTC (rev 71160)
@@ -145,38 +145,24 @@
             {1.1: ('query1',1,'conn_id'),
              3.2: ('query2',1,'conn_id'),}
             )
-        # three
+        # three - now we drop our first cache entry
         self._do_query('query3',4.3)
+        # XXX oops - because dicts have an arbitary ordering, we dumped the wrong key!
         self._check_cache(
             {('query1',1,'conn_id'): (1.1,'result for query1'),
-             ('query2',1,'conn_id'): (3.2,'result for query2'),
              ('query3',1,'conn_id'): (4.3,'result for query3'),},
             {1.1: ('query1',1,'conn_id'),
-             3.2: ('query2',1,'conn_id'),
              4.3: ('query3',1,'conn_id'),}
             )
-        # four - now we drop our first cache entry, this is an off-by-one error
+        # four - now we drop our second cache entry
         self._do_query('query4',8.4)
         # XXX oops - because dicts have an arbitary ordering, we dumped the wrong key!
         self._check_cache(
-            {('query2',1,'conn_id'): (3.2,'result for query2'),
-             ('query1',1,'conn_id'): (1.1,'result for query1'),
+            {('query1',1,'conn_id'): (1.1,'result for query1'),
              ('query4',1,'conn_id'): (8.4,'result for query4'),},
             {1.1: ('query1',1,'conn_id'),
-             3.2: ('query2',1,'conn_id'),
              8.4: ('query4',1,'conn_id'),}
             )
-        # five - now we drop another cache entry
-        self._do_query('query5',9.5)
-        # XXX oops - because dicts have an arbitary ordering, we dumped the wrong key!
-        self._check_cache(
-            {('query1',1,'conn_id'): (1.1,'result for query1'),
-             ('query2',1,'conn_id'): (3.2,'result for query2'),
-             ('query5',1,'conn_id'): (9.5,'result for query5'),},
-            {1.1: ('query1',1,'conn_id'),
-             3.2: ('query2',1,'conn_id'),
-             9.5: ('query5',1,'conn_id'),}
-            )
         
     def test_different_queries_same_second(self):
         # This tests different queries being fired into the cache
@@ -198,37 +184,23 @@
             {1.0: ('query1',1,'conn_id'),
              1.1: ('query2',1,'conn_id'),}
             )
-        # three
+        # three - now we drop our first cache entry
         self._do_query('query3',1.2)
         self._check_cache(
-            {('query1',1,'conn_id'): (1.0,'result for query1'),
-             ('query2',1,'conn_id'): (1.1,'result for query2'),
+            {('query2',1,'conn_id'): (1.1,'result for query2'),
              ('query3',1,'conn_id'): (1.2,'result for query3'),},
-            {1.0: ('query1',1,'conn_id'),
-             1.1: ('query2',1,'conn_id'),
+            {1.1: ('query2',1,'conn_id'),
              1.2: ('query3',1,'conn_id'),}
             )
-        # four - now we drop our first cache entry, this is an off-by-one error
+        # four - now we drop another cache entry
         self._do_query('query4',1.3)
+        # XXX oops - because dicts have an arbitary ordering, we dumped the wrong key!
         self._check_cache(
             {('query2',1,'conn_id'): (1.1,'result for query2'),
-             ('query3',1,'conn_id'): (1.2,'result for query3'),
              ('query4',1,'conn_id'): (1.3,'result for query4'),},
             {1.1: ('query2',1,'conn_id'),
-             1.2: ('query3',1,'conn_id'),
              1.3: ('query4',1,'conn_id'),}
             )
-        # five - now we drop another cache entry
-        self._do_query('query5',1.4)
-        self._check_cache(
-            # XXX - dicts have unordered keys, so we drop records early here
-            {('query3',1,'conn_id'): (1.2,'result for query3'),
-             ('query2',1,'conn_id'): (1.1,'result for query2'),
-             ('query5',1,'conn_id'): (1.4,'result for query5'),},
-            {1.2: ('query3',1,'conn_id'),
-             1.1: ('query2',1,'conn_id'),
-             1.4: ('query5',1,'conn_id'),}
-            )
 
     def test_time_tcache_expires(self):
         # the first query gets cached
@@ -237,15 +209,15 @@
             {('query1',1,'conn_id'): (1,'result for query1')},
             {1: ('query1',1,'conn_id')}
             )
-        # the 2nd gets cached, the cache is still smaller than max_cache / 2
-        self._do_query('query2',12)
+        # the 2nd gets cached, the cache is still smaller than max_cache
+        self._do_query('query2',2)
         self._check_cache(
             {('query1',1,'conn_id'): (1,'result for query1'),
-             ('query2',1,'conn_id'): (12,'result for query2')},
+             ('query2',1,'conn_id'): (2,'result for query2')},
             {1: ('query1',1,'conn_id'),
-             12:('query2',1,'conn_id')}
+             2:('query2',1,'conn_id')}
             )
-        # the 2rd trips the max_cache/2 trigger, so both our old queries get
+        # the 3rd trips the max_cache trigger, so both our old queries get
         # dumped
         self._do_query('query',23)
         self._check_cache(
@@ -261,69 +233,12 @@
             {1: ('query1',1,'conn_id')}
             )
         # do the same query much later, so new one gets cached
-        # XXX memory leak as old tcache entry is left lying around
         self._do_query('query1',12)
         self._check_cache(
             {('query1',1,'conn_id'): (12,'result for query1')},
-            {1: ('query1',1,'conn_id'), # XXX why is 1 still here?
-             12: ('query1',1,'conn_id')}
+            {12: ('query1',1,'conn_id')}
             )
-    def test_cachetime_doesnt_match_tcache_time(self):
-        # get some old entries for one query in
-        # (the time are carefully picked to give out-of-order dict keys)
-        self._do_query('query1',1)
-        self._do_query('query1',19)
-        self._do_query('query1',44)
-        self._check_cache(
-            {('query1',1,'conn_id'): (44,'result for query1')},
-            {1: ('query1',1,'conn_id'),
-             19: ('query1',1,'conn_id'),
-             44: ('query1',1,'conn_id')}
-            )
-        # now do another query
-        self._do_query('query2',44.1)
-        # XXX whoops, because {4:True,19:True,44:True,44.1:True}.keys()==[1,19,44,44.1]
-        # the cache/tcache clearing code deletes the cache entry and
-        # then tries to do it again later with an older tcache entry.
-        # brian's patch in Dec 2000 works around this.
-        self._do_query('query3',44.2)
-        self._check_cache(
-            {('query1',1,'conn_id'): (44,'result for query1'),
-             ('query2',1,'conn_id'): (44.1,'result for query2'),
-             ('query3',1,'conn_id'): (44.2,'result for query3')},
-            {44: ('query1',1,'conn_id'),
-             44.1: ('query2',1,'conn_id'),
-             44.2: ('query3',1,'conn_id'),
-             }
-            )
-        
-    def test_cachefull_but_not_old(self):
-        # get some old entries for one query in
-        # (the time are carefully picked to give out-of-order dict keys)
-        self._do_query('query1',4)
-        self._do_query('query1',19)
-        self._do_query('query1',44)
-        self._check_cache(
-            {('query1',1,'conn_id'): (44,'result for query1')},
-            {4: ('query1',1,'conn_id'),
-             19: ('query1',1,'conn_id'),
-             44: ('query1',1,'conn_id'),}
-            )
-        # now do another query
-        self._do_query('query2',41.1)
-        # XXX whoops, because {4:True,19:True,44:True,44.1}.keys()==[44,19,4,44.1]
-        # the cache/tcache clearing code makes a mistake:
-        # - the first time round the while loop, we remove 43 from the cache
-        #   and tcache 'cos it matches the time in the cache
-        # - the 2nd time round, 5 is "too old", so we attempt to check
-        #   if it matches the query in the cache, which blows up :-)
-        self.assertRaises(KeyError,self._do_query,'query3',41.2)
-        self._check_cache(
-            {('query2',1,'conn_id'): (41.1,'result for query2')},
-            {4.0: ('query1',1,'conn_id'),
-             41.1: ('query2',1,'conn_id'),}
-            )
-        
+
 class DummyDA:
 
     def __call__(self):



More information about the Zope-Checkins mailing list