[Zope-Checkins] CVS: Zope/lib/python/ZODB - cPickleCache.c:1.51

Toby Dickenson tdickenson@geminidataloggers.com
Wed, 3 Apr 2002 12:00:47 -0500


Update of /cvs-repository/Zope/lib/python/ZODB
In directory cvs.zope.org:/tmp/cvs-serv21155

Modified Files:
	cPickleCache.c 
Log Message:
added commentry

=== Zope/lib/python/ZODB/cPickleCache.c 1.50 => 1.51 ===
  ****************************************************************************/
 
+/*
+
+Objects are stored under three different regimes:
+
+Regime 1: Persistent Classes
+
+Persistent Classes are part of ZClasses. They are stored in the
+self->data dictionary, and are never garbage collected.
+
+The klass_items() method returns a sequence of (oid,object) tuples
+for every Persistent Class, which should make it possible to
+implement garbage collection in Python if necessary.
+
+Regime 2: Ghost Objects
+
+There is no benefit to keeping a ghost object which has no
+external references, therefore a weak reference scheme is
+used to ensure that ghost objects are removed from memory
+as soon as possible, when the last external reference is lost.
+
+Ghost objects are stored in the self->data dictionary. Normally
+a dictionary keeps a strong reference on its values, however
+this reference count is 'stolen'.
+
+This weak reference scheme leaves a dangling reference, in the
+dictionary, when the last external reference is lost. To clean up
+this dangling reference the persistent object dealloc function
+calls self->cache->_oid_unreferenced(self->oid). The cache looks
+up the oid in the dictionary, ensures it points to an object whose
+reference count is zero, then removes it from the dictionary. Before
+removing the object from the dictionary it must temporarily resurrect
+the object in much the same way that class instances are resurrected
+before their __del__ is called.
+
+Since ghost objects are stored under a different regime to
+non-ghost objects, an extra ghostify function in cPersistenceAPI
+replaces self->state=GHOST_STATE assignments that were common in
+other persistent classes (such as BTrees).
+
+Regime 3: Non-Ghost Objects
+
+Non-ghost objects are stored in two data structures. Firstly, in
+the dictionary along with everything else, with a *strong* reference.
+Secondly, they are stored in a doubly-linked-list which encodes
+the order in which these objects have been most recently used.
+
+The doubly-link-list nodes contain next and previous pointers
+linking together the cache and all non-ghost persistent objects.
+
+The node embedded in the cache is the home position. On every
+attribute access a non-ghost object will relink itself just
+behind the home position in the ring. Objects accessed least
+recently will eventually find themselves positioned after
+the home position.
+
+Occasionally other nodes are temporarily inserted in the ring
+as position markers. The cache contains a ring_lock flag which
+must be set and unset before and after doing so. Only if the flag
+is unset can the cache assume that all nodes are either his own
+home node, or nodes from persistent objects. This assumption is
+useful during the garbage collection process.
+
+The number of non-ghost objects is counted in self->non_ghost_count.
+The garbage collection process consists of traversing the ring, and
+deactivating (that is, turning into a ghost) every object until
+self->non_ghost_count is down to the target size, or until it
+reaches the home position again.
+
+Note that objects in the sticky or changed states are still kept
+in the ring, however they can not be deactivated. The garbage
+collection process must skip such objects, rather than deactivating
+them.
+
+*/
+
 static char cPickleCache_doc_string[] =
 "Defines the PickleCache used by ZODB Connection objects.\n"
 "\n"
@@ -30,9 +105,10 @@
 
 static PyObject *py__p_oid, *py_reload, *py__p_jar, *py__p_changed;
 
-/* define this for extra debugging checks, and lousy performance */
-/* Are any of these checks necessary in production code?  How do we
-   decide when to disable it?
+/* define this for extra debugging checks, and lousy performance.
+   Not really necessary in production code... disable this before
+   release, providing noone has been reporting and RuntimeErrors
+   that it uses to report problems.
 */
 #define MUCH_RING_CHECKING 1
 
@@ -44,23 +120,36 @@
 #define ENGINE_NOISE(A) ((void)A)
 #endif
 
-/* This object is the pickle cache.  The layout of this struct is the same
- * as the start of ccobject_head in cPersistence.c
- * XXX Why do they need to have the same layouts?
- */
+/* This object is the pickle cache.  The CACHE_HEAD macro guarantees that
+layout of this struct is the same as the start of ccobject_head in
+cPersistence.c */
 typedef struct {
     CACHE_HEAD
-    int klass_count;                         /* XXX for ZClass support? */
+    int klass_count;                         /* count of persistent classes */
     PyObject *data;                          /* oid -> object dict */
     PyObject *jar;                           /* Connection object */
     PyObject *setklassstate;                 /* ??? */
-    int cache_size;                          /* number of items in cache */
-    int ring_lock;                           /* ??? */
-    /* XXX Settable from Python, this appears to be a ratio controlling how
-     * the cache gets gradually smaller.  It is probably an error for this
-     * to be negative.
-     */
+    int cache_size;                          /* target number of items in cache */
+
+    /* Most of the time the ring contains only:
+       * many nodes corresponding to persistent objects
+       * one 'home' node from the cache.
+    In some cases it is handy to temporarily add other types
+    of node into the ring as placeholders. 'ring_lock' is a boolean
+    indicating that someone has already done this. Currently this
+    is only used by the garbage collection code. */
+
+    int ring_lock;
+
+    /* 'cache_drain_resistance' controls how quickly the cache size will drop
+    when it is smaller than the configured size. A value of zero means it will
+    not drop below the configured size (suitable for most caches). Otherwise,
+    it will remove cache_non_ghost_count/cache_drain_resistance items from
+    the cache every time (suitable for rarely used caches, such as those
+    associated with Zope versions. */
+
     int cache_drain_resistance;
+
 } ccobject;
 
 static int present_in_ring(ccobject *self, CPersistentRing *target);
@@ -127,6 +216,8 @@
 static int
 scan_gc_items(ccobject *self,int target)
 {
+    /* This function must only be called with the ring lock held */
+
     cPersistentObject *object;
     int error;
     CPersistentRing *here = self->ring_home.next;
@@ -174,8 +265,9 @@
             return 0;
 
         /* At this point we know that the ring only contains nodes from
-        persistent objects, plus our own home node. We can safely
-        assume this is a persistent object now we know it is not the home */
+        persistent objects, plus our own home node. We know this because
+        the ring lock is held.  We can safely assume the current ring
+        node is a persistent object now we know it is not the home */
         object = object_from_ring(self, here, "scan_gc_items");
         if (!object) 
 	    return -1;
@@ -190,6 +282,8 @@
              * so that we can follow the link after the ghosted object is
              * removed from the ring (via ghostify()).
              */
+
+            /* FIXME: This needs to be changed back to a placeholder */
             CPersistentRing *next = here->next;
 
             ENGINE_NOISE("G");
@@ -420,6 +514,8 @@
 	return NULL;
 
     if (self->ring_lock) {
+	/* When the ring lock is held, we have no way of know which ring nodes
+	belong to persistent objects, and which a placeholders. */
         PyErr_SetString(PyExc_ValueError,
 		".lru_items() is unavailable during garbage collection");
         return NULL;
@@ -498,6 +594,8 @@
 
 #ifdef Py_TRACE_REFS
 #error "this code path has not been tested - Toby Dickenson"
+    /* not tested, but it should still work. I would appreciate
+       reports of success */
     _Py_NewReference(v);
     /* it may be a problem that v->ob_type is still NULL? */
 #else
@@ -682,12 +780,14 @@
 			"Cache values must be persistent objects.");
 	return -1;
     }
-    /* Is this set of tests necessary? */
     class = (PyExtensionClass *)(v->ob_type);
     if (!((class->class_flags & PERSISTENT_TYPE_FLAG)
 	  && v->ob_type->tp_basicsize >= sizeof(cPersistentObject))) {
 	PyErr_SetString(PyExc_ValueError, 
 			"Cache values must be persistent objects.");
+	/* Must be either persistent classes (ie ZClasses), or instances
+	of persistent classes (ie Python classeses that derive from
+	Persistence.Persistent, BTrees, etc) */
 	return -1;
     }
 
@@ -698,7 +798,8 @@
 
     if (oid == NULL)
 	return -1;
-    /* XXX key and oid should both be PyString objects */
+    /* XXX key and oid should both be PyString objects.
+       May be helpful to check this. */
     if (PyObject_Cmp(key, oid, &result) < 0) {
 	Py_DECREF(oid);
 	return -1;