Yea thats right ... we will never know how many extfile objects share the file. But there is enough resources to be able to know it.
We can add a reference counting functionality to HashDir class. And then we can keep track of this.... see my code bellow.... i have created a ReferenceCounter and it works fine. I can now implicitly delete with out worry.
But still when i use it in my zope application the '__del__' itself is *not* being invoked :?
plz tell me what you think....
referencecounter.py:-
------------------------------------------------------------------------------------
import os
from persistent import Persistent
class ReferenceCounter(Persistent):
""" keeps track number of ExtBytesProperties
refering to the a file in HashDir """
def __init__(self, dirpath):
self.path = os.path.join(dirpath,'refcount')
self.counts={}
if os.path.exists(self.path): # else case? we will deal with it in commit!
f= open(self.path)
for line in f:
digest, count=line.split(',')
self.counts[digest]=int(count)
f.close()
def getCount(self,digest):
return self.counts.get(digest,0)
def getTotalFiles(self):
return len(self.counts)
def addReference(self,digest):
newCount=self.getCount(digest)+1
self.counts[digest]=newCount
return newCount
def removeReference(self,digest):
newCount=self.getCount(digest)-1
if newCount > 0:
self.counts[digest]=newCount
elif newCount == 0:
del self.counts[digest]
return newCount
def commit(self):
f=open(self.path,'w')
f.write('\n'.join("%s,%s" % (digest,count)
for digest,count in self.counts.items()))
f.close()
--------------------------------------------------------------------------------
hashdir.py
--------------------------------------------------------------------------------
import sha
import os
import stat
import tempfile
import shutil
from types import StringTypes, UnicodeType
import interfaces
from zope import interface
from persistent import Persistent
from zope.cachedescriptors.property import Lazy
from referencecounter import ReferenceCounter
class HashDir(Persistent):
"""a directory holding files named after their sha1 hash"""
interface.implements(interfaces.IHashDir)
_path = None
def __init__(self, path=None):
self.path = path
self.refCounter = ReferenceCounter(self.etc)
def _setPath(self, path):
if path is None:
return
self._path = os.path.abspath(path)
self.tmp = os.path.join(self.path, 'tmp')
self.var = os.path.join(self.path, 'var')
self.etc = os.path.join(self.path, 'etc')
self._initPaths()
def _getPath(self):
return self._path
path = property(_getPath,_setPath)
def _initPaths(self):
for path in [self.path,self.var,self.tmp,self.etc]:
if not os.path.exists(path):
os.mkdir(path)
def new(self):
"""returns a new filehandle"""
handle, path = tempfile.mkstemp(prefix='dirty.',
dir=self.tmp)
return WriteFile(self, handle, path)
def commit(self, f):
"""commit a file, this is called by the file"""
digest = f.sha.hexdigest()
target = os.path.join(self.var, digest)
if os.path.exists(target):
# we have that content so just delete the tmp file
os.remove(f.path)
else:
shutil.move(f.path, target)
os.chmod(target, 0440)
self.refCounter.addReference(digest)
self.refCounter.commit()
return digest
def digests(self):
"""returns all digests stored"""
return os.listdir(self.var)
def getPath(self, digest):
if type(digest) not in StringTypes or len(digest) != 40:
raise ValueError, repr(digest)
if type(self.var) is UnicodeType:
digest = unicode(digest)
path = os.path.join(self.var, digest)
if not os.path.isfile(path):
raise KeyError, digest
return path
def getSize(self, digest):
return os.path.getsize(self.getPath(digest))
def open(self, digest):
return ReadFile(self.getPath(digest))
def delete(self,digest):
"""delete the file"""
path=self.getPath(digest)
if os.path.exists(path):
if self.refCounter.removeReference(digest) is 0:
os.remove(path)
self.refCounter.commit()
return
class ReadFile(object):
"""A lazy read file implementation"""
interface.implements(interfaces.IReadFile)
def __init__(self, name, bufsize=-1):
self.name = name
self.digest = str(os.path.split(
self.name)[1])
self.bufsize=bufsize
self._v_len = None
self._v_file = None
@property
def _file(self):
if not self.closed:
return self._v_file
self._v_file = file(
self.name, 'rb', self.bufsize)
return self._v_file
@Lazy
def ctime(self):
return int(os.stat(
self.name)[stat.ST_CTIME])
@Lazy
def atime(self):
return int(os.stat(
self.name)[stat.ST_ATIME])
def __len__(self):
if self._v_len is None:
self._v_len = int(os.stat(
self.name)[stat.ST_SIZE])
return self._v_len
def __repr__(self):
return "<ReadFile named %s>" % repr(self.digest)
@property
def closed(self):
"""like file closed, but lazy"""
return self._v_file is None or self._v_file.closed
def seek(self, offset, whence=0):
"""see file.seek"""
# we optimize when we have 0, 0 then we do not need to open
# the file if it is closed, because on the next read we are at
# 0
if offset==0 and whence==0 and self.closed:
return
return self._file.seek(offset, whence)
def tell(self):
"""see file.tell"""
if self.closed:
return 0
return self._file.tell()
def read(self, size=-1):
"""see file.read"""
return self._file.read(size)
def close(self):
"""see file.close"""
if not self.closed:
self._v_file.close()
self._v_file = None
def fileno(self):
return self._file.fileno()
def __iter__(self):
return self._file.__iter__()
class WriteFile(object):
interface.implements(interfaces.IWriteFile)
def __init__(self, hd, handle, path):
self.hd = hd
self.handle = handle
self.path = path
self.sha = sha.new()
self._pos = 0
def write(self, s):
self.sha.update(s)
os.write(self.handle, s)
self._pos += len(s)
def commit(self):
"""returns the sha digest and saves the file"""
os.close(self.handle)
return self.hd.commit(self)
def tell(self):
"""see file.tell"""
return self._pos
def abort(self):
"""abort the write and delete file"""
os.close(self.handle)
os.unlink(self.path)
--------------------------------------------------------------------------------------
property.py
-------------------------------------------------------------------------------------
from zope import component
import interfaces
from cStringIO import StringIO
from datamanager import getFile, _storage
_marker = object()
BLOCK_SIZE = 1024*128
class ExtBytesProperty(object):
"""a property which's values are stored as external files"""
def __init__(self, name):
self.__name = name
def __delete__(self,inst):
digest = inst.__dict__[self.__name]
self.hd.delete(digest)
@property
def hd(self):
return component.getUtility(interfaces.IHashDir)
def __get__(self, inst, klass):
if inst is None:
return self
digest = inst.__dict__.get(self.__name, _marker)
if digest is _marker:
return None
return getFile(digest)
def __set__(self, inst, value):
# ignore if value is None
if value is None:
if inst.__dict__.has_key(self.__name):
del inst.__dict__[self.__name]
return
# Handle case when value is a string
if isinstance(value, unicode):
value = value.encode('UTF-8')
if isinstance(value, str):
value = StringIO(value)
value.seek(0)
f = self.hd.new()
while True:
chunk = value.read(BLOCK_SIZE)
if not chunk:
newDigest = f.commit()
oldDigest = inst.__dict__.get(self.__name, _marker)
if newDigest == oldDigest:
# we have no change, so we have to seek to zero
# because this is normal behaviour when setting a
# new value
if hasattr(_storage, 'dataManager'):
if newDigest in _storage.dataManager.files:
f = _storage.dataManager.files[newDigest]
f.seek(0)
else:
inst.__dict__[self.__name] = newDigest
"""
case 1: oldDigest=newDigets though the object is still refering
to same file f.comit would have incremented the reference by one.
So to balance it oldDigest must be deleted.
case 2: object referencing a new file. So old file must be deleted
"""
if oldDigest is not _marker:
self.hd.delete(oldDigest)
break
f.write(chunk)
--------------------------------------------------------------------------------------
file/file.py
--------------------------------------------------------------------------------------
from persistent import Persistent
from z3c.extfile.property import ExtBytesProperty
from interfaces import IExtFile
from zope import interface
class ExtFile(Persistent):
"""A zope file implementation based on z3c.extfile"""
interface.implements(IExtFile)
data = ExtBytesProperty('data')
def __init__(self, data='', contentType=''):
self.data = data
self.contentType = contentType
def __del__(self):
del self.data
print "deleted data"
def delete(self):