[Zodb-checkins] SVN: ZODB/branches/zcZODB-3.8/src/ Merged Deiter's branch that provides object-size-based object cache
Jim Fulton
jim at zope.com
Tue Sep 16 10:49:45 EDT 2008
Log message for revision 91190:
Merged Deiter's branch that provides object-size-based object cache
control.
Changed:
U ZODB/branches/zcZODB-3.8/src/ZODB/Connection.py
U ZODB/branches/zcZODB-3.8/src/ZODB/DB.py
U ZODB/branches/zcZODB-3.8/src/ZODB/component.xml
U ZODB/branches/zcZODB-3.8/src/ZODB/config.py
U ZODB/branches/zcZODB-3.8/src/ZODB/tests/testConnection.py
U ZODB/branches/zcZODB-3.8/src/persistent/cPersistence.c
U ZODB/branches/zcZODB-3.8/src/persistent/cPersistence.h
U ZODB/branches/zcZODB-3.8/src/persistent/cPickleCache.c
U ZODB/branches/zcZODB-3.8/src/persistent/tests/persistent.txt
-=-
Modified: ZODB/branches/zcZODB-3.8/src/ZODB/Connection.py
===================================================================
--- ZODB/branches/zcZODB-3.8/src/ZODB/Connection.py 2008-09-16 10:27:04 UTC (rev 91189)
+++ ZODB/branches/zcZODB-3.8/src/ZODB/Connection.py 2008-09-16 14:49:43 UTC (rev 91190)
@@ -79,7 +79,7 @@
##########################################################################
# Connection methods, ZODB.IConnection
- def __init__(self, db, version='', cache_size=400):
+ def __init__(self, db, version='', cache_size=400, cache_size_bytes=0):
"""Create a new Connection."""
self._log = logging.getLogger('ZODB.Connection')
@@ -106,7 +106,7 @@
# Cache which can ghostify (forget the state of) objects not
# recently used. Its API is roughly that of a dict, with
# additional gc-related and invalidation-related methods.
- self._cache = PickleCache(self, cache_size)
+ self._cache = PickleCache(self, cache_size, cache_size_bytes)
# The pre-cache is used by get to avoid infinite loops when
# objects immediately load their state whern they get their
@@ -637,6 +637,10 @@
else:
s = self._storage.store(oid, serial, p, self._version,
transaction)
+ self._cache.update_object_size_estimation(oid,
+ len(p)
+ )
+ obj._p_estimated_size = len(p)
self._store_count += 1
# Put the object in the cache before handling the
# response, just in case the response contains the
@@ -869,6 +873,10 @@
self._reader.setGhostState(obj, p)
obj._p_serial = serial
+ self._cache.update_object_size_estimation(obj._p_oid,
+ len(p)
+ )
+ obj._p_estimated_size = len(p)
# Blob support
if isinstance(obj, Blob):
@@ -1027,7 +1035,8 @@
self._invalidated.clear()
self._invalidatedCache = False
cache_size = self._cache.cache_size
- self._cache = cache = PickleCache(self, cache_size)
+ cache_size_bytes = self._cache.cache_size_bytes
+ self._cache = cache = PickleCache(self, cache_size, cache_size_bytes)
##########################################################################
# Python protocol
@@ -1125,6 +1134,12 @@
for oid in oids:
data, serial = src.load(oid, src)
+ obj = self._cache.get(oid, None)
+ if obj is not None:
+ self._cache.update_object_size_estimation(obj._p_oid,
+ len(data)
+ )
+ obj._p_estimated_size = len(data)
if isinstance(self._reader.getGhost(data), Blob):
blobfilename = src.loadBlob(oid, serial)
s = self._storage.storeBlob(oid, serial, data, blobfilename,
Modified: ZODB/branches/zcZODB-3.8/src/ZODB/DB.py
===================================================================
--- ZODB/branches/zcZODB-3.8/src/ZODB/DB.py 2008-09-16 10:27:04 UTC (rev 91189)
+++ ZODB/branches/zcZODB-3.8/src/ZODB/DB.py 2008-09-16 14:49:43 UTC (rev 91190)
@@ -221,6 +221,7 @@
def __init__(self, storage,
pool_size=7,
cache_size=400,
+ cache_size_bytes=0,
version_pool_size=3,
version_cache_size=100,
database_name='unnamed',
@@ -232,10 +233,15 @@
- `storage`: the storage used by the database, e.g. FileStorage
- `pool_size`: expected maximum number of open connections
- `cache_size`: target size of Connection object cache
+ - `cache_size_bytes`: target size measured in total estimated size
+ of objects in the Connection object cache.
+ "0" means unlimited.
- `version_pool_size`: expected maximum number of connections (per
version)
- `version_cache_size`: target size of Connection object cache for
version connections
+ - `historical_pool_size`: expected maximum number of total
+ historical connections
"""
# Allocate lock.
x = threading.RLock()
@@ -249,6 +255,7 @@
self._cache_size = cache_size
self._version_pool_size = version_pool_size
self._version_cache_size = version_cache_size
+ self._cache_size_bytes = cache_size_bytes
# Setup storage
self._storage=storage
@@ -515,6 +522,9 @@
def getCacheSize(self):
return self._cache_size
+ def getCacheSizeBytes(self):
+ return self._cache_size_bytes
+
def lastTransaction(self):
return self._storage.lastTransaction()
@@ -612,7 +622,7 @@
size = self._version_cache_size
else:
size = self._cache_size
- c = self.klass(self, version, size)
+ c = self.klass(self, version, size, self._cache_size_bytes)
pool.push(c)
result = pool.pop()
assert result is not None
@@ -710,14 +720,16 @@
"Versions are deprecated and will become unsupported "
"in ZODB 3.9",
DeprecationWarning, 2)
+
+ def setCacheSizeBytes(self, size):
self._a()
try:
- self._version_cache_size = size
- def setsize(c):
- c._cache.cache_size = size
- for version, pool in self._pools.items():
- if version:
- pool.map(setsize)
+ self._cache_size_bytes = size
+ pool = self._pools.get('')
+ if pool is not None:
+ def setsize(c):
+ c._cache.cache_size_bytes = size
+ pool.map(setsize)
finally:
self._r()
Modified: ZODB/branches/zcZODB-3.8/src/ZODB/component.xml
===================================================================
--- ZODB/branches/zcZODB-3.8/src/ZODB/component.xml 2008-09-16 10:27:04 UTC (rev 91189)
+++ ZODB/branches/zcZODB-3.8/src/ZODB/component.xml 2008-09-16 14:49:43 UTC (rev 91190)
@@ -181,6 +181,13 @@
Target size, in number of objects, of each connection's
object cache.
</description>
+ <key name="cache-size-bytes" datatype="byte-size" default="0">
+ <description>
+ Target size, in total estimated size for objects, of each connection's
+ object cache.
+ "0" means no limit.
+ </description>
+ </key>
<key name="pool-size" datatype="integer" default="7"/>
<description>
The expected maximum number of simultaneously open connections.
Modified: ZODB/branches/zcZODB-3.8/src/ZODB/config.py
===================================================================
--- ZODB/branches/zcZODB-3.8/src/ZODB/config.py 2008-09-16 10:27:04 UTC (rev 91189)
+++ ZODB/branches/zcZODB-3.8/src/ZODB/config.py 2008-09-16 14:49:43 UTC (rev 91190)
@@ -99,6 +99,7 @@
return ZODB.DB(storage,
pool_size=section.pool_size,
cache_size=section.cache_size,
+ cache_size_bytes=section.cache_size_bytes,
version_pool_size=section.version_pool_size,
version_cache_size=section.version_cache_size,
database_name=section.database_name,
Modified: ZODB/branches/zcZODB-3.8/src/ZODB/tests/testConnection.py
===================================================================
--- ZODB/branches/zcZODB-3.8/src/ZODB/tests/testConnection.py 2008-09-16 10:27:04 UTC (rev 91189)
+++ ZODB/branches/zcZODB-3.8/src/ZODB/tests/testConnection.py 2008-09-16 14:49:43 UTC (rev 91190)
@@ -515,6 +515,111 @@
>>> db.close()
"""
+class _PlayPersistent(Persistent):
+ def setValueWithSize(self, size=0): self.value = size*' '
+ __init__ = setValueWithSize
+
+class EstimatedSizeTests(unittest.TestCase):
+ """check that size estimations are handled correctly."""
+
+ def setUp(self):
+ self.db = db = databaseFromString("<zodb>\n<mappingstorage/>\n</zodb>")
+ self.conn = c = db.open()
+ self.obj = obj = _PlayPersistent()
+ c.root()['obj'] = obj
+ transaction.commit()
+
+ def test_size_set_on_write_commit(self):
+ obj, cache = self.obj, self.conn._cache
+ # we have just written "obj". Its size should not be zero
+ size, cache_size = obj._p_estimated_size, cache.total_estimated_size
+ self.assert_(size > 0)
+ self.assert_(cache_size > size)
+ # increase the size, write again and check that the size changed
+ obj.setValueWithSize(1000)
+ transaction.commit()
+ new_size = obj._p_estimated_size
+ self.assert_(new_size > size)
+ self.assertEqual(cache.total_estimated_size, cache_size + new_size - size)
+
+ def test_size_set_on_write_savepoint(self):
+ obj, cache = self.obj, self.conn._cache
+ # we have just written "obj". Its size should not be zero
+ size, cache_size = obj._p_estimated_size, cache.total_estimated_size
+ # increase the size, write again and check that the size changed
+ obj.setValueWithSize(1000)
+ transaction.savepoint()
+ new_size = obj._p_estimated_size
+ self.assert_(new_size > size)
+ self.assertEqual(cache.total_estimated_size, cache_size + new_size - size)
+
+ def test_size_set_on_load(self):
+ c = self.db.open() # new connection
+ obj = c.root()['obj']
+ # the object is still a ghost and '_p_estimated_size' not yet set
+ # access to unghost
+ cache = c._cache
+ cache_size = cache.total_estimated_size
+ obj.value
+ size = obj._p_estimated_size
+ self.assert_(size > 0)
+ self.assertEqual(cache.total_estimated_size, cache_size + size)
+ # we test here as well that the deactivation works reduced the cache size
+ obj._p_deactivate()
+ self.assertEqual(cache.total_estimated_size, cache_size)
+
+
+ def test_configuration(self):
+ # verify defaults ....
+ expected = 0
+ # ... on db
+ db = self.db
+ self.assertEqual(db.getCacheSizeBytes(), expected)
+ # ... on connection
+ conn = self.conn
+ self.assertEqual(conn._cache.cache_size_bytes, expected)
+ # verify explicit setting ...
+ expected = 10000
+ # ... on db
+ db = databaseFromString("<zodb>\n"
+ " cache-size-bytes %d\n"
+ " <mappingstorage />\n"
+ "</zodb>"
+ % expected
+ )
+ self.assertEqual(db.getCacheSizeBytes(), expected)
+ # ... on connectionB
+ conn = db.open()
+ self.assertEqual(conn._cache.cache_size_bytes, expected)
+ # test huge (larger than 4 byte) size limit
+ db = databaseFromString("<zodb>\n"
+ " cache-size-bytes 8GB\n"
+ " <mappingstorage />\n"
+ "</zodb>"
+ )
+ self.assertEqual(db.getCacheSizeBytes(), 0x1L << 33)
+
+
+ def test_cache_garbage_collection(self):
+ db = self.db
+ # activate size based cache garbage collection
+ db.setCacheSizeBytes(1)
+ conn = self.conn
+ cache = conn._cache
+ # verify the change worked as expected
+ self.assertEqual(cache.cache_size_bytes, 1)
+ # verify our entrance assumption is fullfilled
+ self.assert_(cache.total_estimated_size > 1)
+ conn.cacheGC()
+ self.assert_(cache.total_estimated_size <= 1)
+ # sanity check
+ self.assert_(cache.total_estimated_size >= 0)
+
+
+
+
+
+
# ---- stubs
class StubObject(Persistent):
@@ -653,4 +758,5 @@
s = unittest.makeSuite(ConnectionDotAdd, 'check')
s.addTest(doctest.DocTestSuite())
s.addTest(unittest.makeSuite(TestConnectionInterface))
+ s.addTest(unittest.makeSuite(EstimatedSizeTests))
return s
Modified: ZODB/branches/zcZODB-3.8/src/persistent/cPersistence.c
===================================================================
--- ZODB/branches/zcZODB-3.8/src/persistent/cPersistence.c 2008-09-16 10:27:04 UTC (rev 91189)
+++ ZODB/branches/zcZODB-3.8/src/persistent/cPersistence.c 2008-09-16 14:49:43 UTC (rev 91190)
@@ -89,6 +89,7 @@
if (self->cache) {
/* Create a node in the ring for this unghostified object. */
self->cache->non_ghost_count++;
+ self->cache->total_estimated_size += self->estimated_size;
ring_add(&self->cache->ring_home, &self->ring);
Py_INCREF(self);
}
@@ -144,6 +145,7 @@
/* if we're ghostifying an object, we better have some non-ghosts */
assert(self->cache->non_ghost_count > 0);
self->cache->non_ghost_count--;
+ self->cache->total_estimated_size -= self->estimated_size;
ring_del(&self->ring);
}
@@ -174,6 +176,7 @@
/* If we're ghostifying an object, we better have some non-ghosts. */
assert(self->cache->non_ghost_count > 0);
self->cache->non_ghost_count--;
+ self->cache->total_estimated_size -= self->estimated_size;
ring_del(&self->ring);
self->state = cPersistent_GHOST_STATE;
dictptr = _PyObject_GetDictPtr((PyObject *)self);
@@ -1011,6 +1014,34 @@
return PyInt_FromLong(self->state);
}
+static PyObject *
+Per_get_estimated_size(cPersistentObject *self)
+{
+ return PyInt_FromLong(self->estimated_size);
+}
+
+static int
+Per_set_estimated_size(cPersistentObject *self, PyObject *v)
+{
+ if (v) {
+ if (PyInt_Check(v)) {
+ if (PyInt_AS_LONG(v) < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "_p_estimated_size must not be negative");
+ return -1;
+ }
+ self->estimated_size = PyInt_AS_LONG(v);
+ }
+ else {
+ PyErr_SetString(PyExc_ValueError,
+ "_p_estimated_size must be an integer");
+ return -1;
+ }
+ } else
+ self->estimated_size = 0;
+ return 0;
+}
+
static PyGetSetDef Per_getsets[] = {
{"_p_changed", (getter)Per_get_changed, (setter)Per_set_changed},
{"_p_jar", (getter)Per_get_jar, (setter)Per_set_jar},
@@ -1018,6 +1049,9 @@
{"_p_oid", (getter)Per_get_oid, (setter)Per_set_oid},
{"_p_serial", (getter)Per_get_serial, (setter)Per_set_serial},
{"_p_state", (getter)Per_get_state},
+ {"_p_estimated_size",
+ (getter)Per_get_estimated_size, (setter)Per_set_estimated_size
+ },
{NULL}
};
Modified: ZODB/branches/zcZODB-3.8/src/persistent/cPersistence.h
===================================================================
--- ZODB/branches/zcZODB-3.8/src/persistent/cPersistence.h 2008-09-16 10:27:04 UTC (rev 91189)
+++ ZODB/branches/zcZODB-3.8/src/persistent/cPersistence.h 2008-09-16 14:49:43 UTC (rev 91190)
@@ -23,11 +23,12 @@
#define CACHE_HEAD \
PyObject_HEAD \
CPersistentRing ring_home; \
- int non_ghost_count;
+ int non_ghost_count; \
+ PY_LONG_LONG total_estimated_size; /* total estimated size of items in cache */
struct ccobject_head_struct;
-typedef struct ccobject_head_struct PerCache;
+typedef struct ccobject_head_struct PerCache;
/* How big is a persistent object?
@@ -38,13 +39,14 @@
8 ring struct
8 serialno
4 state + extra
+ 4 size info
- (52) so far
+ (56) so far
4 dict ptr
4 weaklist ptr
-------------------------
- 64 only need 62, but obmalloc rounds up to multiple of eight
+ 68 only need 62, but obmalloc rounds up to multiple of eight
Even a ghost requires 64 bytes. It's possible to make a persistent
instance with slots and no dict, which changes the storage needed.
@@ -59,7 +61,8 @@
CPersistentRing ring; \
char serial[8]; \
signed char state; \
- unsigned char reserved[3];
+ unsigned char reserved[3]; \
+ unsigned long estimated_size;
#define cPersistent_GHOST_STATE -1
#define cPersistent_UPTODATE_STATE 0
Modified: ZODB/branches/zcZODB-3.8/src/persistent/cPickleCache.c
===================================================================
--- ZODB/branches/zcZODB-3.8/src/persistent/cPickleCache.c 2008-09-16 10:27:04 UTC (rev 91189)
+++ ZODB/branches/zcZODB-3.8/src/persistent/cPickleCache.c 2008-09-16 14:49:43 UTC (rev 91190)
@@ -116,6 +116,7 @@
PyObject *data; /* oid -> object dict */
PyObject *jar; /* Connection object */
int cache_size; /* target number of items in cache */
+ PY_LONG_LONG cache_size_bytes; /* target total estimated size of items in cache */
/* Most of the time the ring contains only:
* many nodes corresponding to persistent objects
@@ -167,7 +168,7 @@
}
static int
-scan_gc_items(ccobject *self, int target)
+scan_gc_items(ccobject *self, int target, PY_LONG_LONG target_bytes)
{
/* This function must only be called with the ring lock held,
because it places non-object placeholders in the ring.
@@ -189,7 +190,11 @@
*/
insert_after(&before_original_home, self->ring_home.r_prev);
here = self->ring_home.r_next; /* least recently used object */
- while (here != &before_original_home && self->non_ghost_count > target) {
+ while (here != &before_original_home &&
+ (self->non_ghost_count > target
+ || (target_bytes && self->total_estimated_size > target_bytes)
+ )
+ ) {
assert(self->ring_lock);
assert(here != &self->ring_home);
@@ -244,7 +249,7 @@
}
static PyObject *
-lockgc(ccobject *self, int target_size)
+lockgc(ccobject *self, int target_size, PY_LONG_LONG target_size_bytes)
{
/* This is thread-safe because of the GIL, and there's nothing
* in between checking the ring_lock and acquiring it that calls back
@@ -256,7 +261,7 @@
}
self->ring_lock = 1;
- if (scan_gc_items(self, target_size) < 0) {
+ if (scan_gc_items(self, target_size, target_size_bytes) < 0) {
self->ring_lock = 0;
return NULL;
}
@@ -272,6 +277,7 @@
int obsolete_arg = -999;
int starting_size = self->non_ghost_count;
int target_size = self->cache_size;
+ PY_LONG_LONG target_size_bytes = self->cache_size_bytes;
if (self->cache_drain_resistance >= 1) {
/* This cache will gradually drain down to a small size. Check
@@ -294,7 +300,7 @@
< 0))
return NULL;
- return lockgc(self, target_size);
+ return lockgc(self, target_size, target_size_bytes);
}
static PyObject *
@@ -307,7 +313,7 @@
if (!PyArg_ParseTuple(args, "|i:full_sweep", &dt))
return NULL;
if (dt == -999)
- return lockgc(self, 0);
+ return lockgc(self, 0, 0);
else
return cc_incrgc(self, args);
}
@@ -327,7 +333,7 @@
< 0))
return NULL;
- return lockgc(self, 0);
+ return lockgc(self, 0, 0);
}
static int
@@ -629,6 +635,32 @@
return PyInt_FromLong(c);
}
+static PyObject *
+cc_update_object_size_estimation(ccobject *self, PyObject *args)
+{
+ PyObject *oid;
+ cPersistentObject *v;
+ unsigned int new_size;
+ if (!PyArg_ParseTuple(args, "OI:updateObjectSizeEstimation", &oid, &new_size))
+ return NULL;
+ /* Note: reference borrowed */
+ v = (cPersistentObject *)PyDict_GetItem(self->data, oid);
+ if (v) {
+ /* we know this object -- update our "total_size_estimation"
+ we must only update when the object is in the ring
+ */
+ if (v->ring.r_next) {
+ self->total_estimated_size += new_size - v->estimated_size;
+ /* we do this in "Connection" as we need it even when the
+ object is not in the cache (or not the ring)
+ */
+ /* v->estimated_size = new_size; */
+ }
+ }
+ Py_RETURN_NONE;
+ }
+
+
static struct PyMethodDef cc_methods[] = {
{"items", (PyCFunction)cc_items, METH_NOARGS,
"Return list of oid, object pairs for all items in cache."},
@@ -655,6 +687,10 @@
"ringlen() -- Returns number of non-ghost items in cache."},
{"debug_info", (PyCFunction)cc_debug_info, METH_NOARGS,
"debug_info() -- Returns debugging data about objects in the cache."},
+ {"update_object_size_estimation",
+ (PyCFunction)cc_update_object_size_estimation,
+ METH_VARARGS,
+ "update_object_size_estimation(oid, new_size) -- update the caches size estimation for *oid* (if this is known to the cache)."},
{NULL, NULL} /* sentinel */
};
@@ -662,9 +698,10 @@
cc_init(ccobject *self, PyObject *args, PyObject *kwds)
{
int cache_size = 100;
+ PY_LONG_LONG cache_size_bytes = 0;
PyObject *jar;
- if (!PyArg_ParseTuple(args, "O|i", &jar, &cache_size))
+ if (!PyArg_ParseTuple(args, "O|iL", &jar, &cache_size, &cache_size_bytes))
return -1;
self->jar = NULL;
@@ -687,7 +724,9 @@
self->jar = jar;
Py_INCREF(jar);
self->cache_size = cache_size;
+ self->cache_size_bytes = cache_size_bytes;
self->non_ghost_count = 0;
+ self->total_estimated_size = 0;
self->klass_count = 0;
self->cache_drain_resistance = 0;
self->ring_lock = 0;
@@ -1018,6 +1057,8 @@
static PyMemberDef cc_members[] = {
{"cache_size", T_INT, offsetof(ccobject, cache_size)},
+ {"cache_size_bytes", T_LONG, offsetof(ccobject, cache_size_bytes)},
+ {"total_estimated_size", T_LONG, offsetof(ccobject, total_estimated_size), RO},
{"cache_drain_resistance", T_INT,
offsetof(ccobject, cache_drain_resistance)},
{"cache_non_ghost_count", T_INT, offsetof(ccobject, non_ghost_count), RO},
Modified: ZODB/branches/zcZODB-3.8/src/persistent/tests/persistent.txt
===================================================================
--- ZODB/branches/zcZODB-3.8/src/persistent/tests/persistent.txt 2008-09-16 10:27:04 UTC (rev 91189)
+++ ZODB/branches/zcZODB-3.8/src/persistent/tests/persistent.txt 2008-09-16 14:49:43 UTC (rev 91190)
@@ -85,7 +85,25 @@
>>> p.x
2
+We can store a size estimation in ``_p_estimated_size``. Its default is 0.
+The size estimation can be used by a cache associated with the data manager
+to help in the implementation of its replacement strategy or its size bounds.
+Of course, the estimated size must not be negative.
+ >>> p._p_estimated_size
+ 0
+ >>> p._p_estimated_size = 1000
+ >>> p._p_estimated_size
+ 1000
+ >>> p._p_estimated_size = -1
+ Traceback (most recent call last):
+ ....
+ ValueError: _p_estimated_size must not be negative
+
+
+
+
+
Test Persistent with Data Manager
---------------------------------
More information about the Zodb-checkins
mailing list