[Zope-dev] delete feature to z3c.extfile
Jayarajan Jn
jayarajan at musmo.com
Fri Sep 26 09:38:04 EDT 2008
On Wed, Sep 24, 2008 at 8:58 PM, Jürgen kartnaller <
juergen.kartnaller at gmail.com> wrote:
>
>
> On Wed, Sep 24, 2008 at 9:10 AM, Jayarajan Jn <jayarajan at musmo.com> wrote:
>
>> Hi Jürgen,
>>
>> First of all thank you for your reply....
>> hmm you are right too... And i am comfortable with the explicit delete.
>> But in my prototype for i have also added webDAV interface for managing
>> extfile.... Then when a user deletes the extfile object through webDAV
>> interface, the file remains there.... coz i don't know where to put my codes
>> to call delete() in that case... And in my project there wont be more than
>> one extfiles refering to same file! so in my case its ok to go for implicit
>> delete! but its now working!
>>
>
> You can never be sure if a file is only used once. If two user upload the
> same file only one copy is stored in extfile because both files have the
> same hash.
>
Yea thats right ... we will never know how many extfile objects share the
file. But there is enough resources to be able to know it.
We can add a reference counting functionality to HashDir class. And then we
can keep track of this.... see my code bellow.... i have created a
ReferenceCounter and it works fine. I can now implicitly delete with out
worry.
But still when i use it in my zope application the '__del__' itself is *not*
being invoked :?
plz tell me what you think....
referencecounter.py:-
------------------------------------------------------------------------------------
import os
from persistent import Persistent
class ReferenceCounter(Persistent):
""" keeps track number of ExtBytesProperties
refering to the a file in HashDir """
def __init__(self, dirpath):
self.path = os.path.join(dirpath,'refcount')
self.counts={}
if os.path.exists(self.path): # else case? we will deal with it in
commit!
f= open(self.path)
for line in f:
digest, count=line.split(',')
self.counts[digest]=int(count)
f.close()
def getCount(self,digest):
return self.counts.get(digest,0)
def getTotalFiles(self):
return len(self.counts)
def addReference(self,digest):
newCount=self.getCount(digest)+1
self.counts[digest]=newCount
return newCount
def removeReference(self,digest):
newCount=self.getCount(digest)-1
if newCount > 0:
self.counts[digest]=newCount
elif newCount == 0:
del self.counts[digest]
return newCount
def commit(self):
f=open(self.path,'w')
f.write('\n'.join("%s,%s" % (digest,count)
for digest,count in self.counts.items()))
f.close()
--------------------------------------------------------------------------------
hashdir.py
--------------------------------------------------------------------------------
import sha
import os
import stat
import tempfile
import shutil
from types import StringTypes, UnicodeType
import interfaces
from zope import interface
from persistent import Persistent
from zope.cachedescriptors.property import Lazy
from referencecounter import ReferenceCounter
class HashDir(Persistent):
"""a directory holding files named after their sha1 hash"""
interface.implements(interfaces.IHashDir)
_path = None
def __init__(self, path=None):
self.path = path
self.refCounter = ReferenceCounter(self.etc)
def _setPath(self, path):
if path is None:
return
self._path = os.path.abspath(path)
self.tmp = os.path.join(self.path, 'tmp')
self.var = os.path.join(self.path, 'var')
self.etc = os.path.join(self.path, 'etc')
self._initPaths()
def _getPath(self):
return self._path
path = property(_getPath,_setPath)
def _initPaths(self):
for path in [self.path,self.var,self.tmp,self.etc]:
if not os.path.exists(path):
os.mkdir(path)
def new(self):
"""returns a new filehandle"""
handle, path = tempfile.mkstemp(prefix='dirty.',
dir=self.tmp)
return WriteFile(self, handle, path)
def commit(self, f):
"""commit a file, this is called by the file"""
digest = f.sha.hexdigest()
target = os.path.join(self.var, digest)
if os.path.exists(target):
# we have that content so just delete the tmp file
os.remove(f.path)
else:
shutil.move(f.path, target)
os.chmod(target, 0440)
self.refCounter.addReference(digest)
self.refCounter.commit()
return digest
def digests(self):
"""returns all digests stored"""
return os.listdir(self.var)
def getPath(self, digest):
if type(digest) not in StringTypes or len(digest) != 40:
raise ValueError, repr(digest)
if type(self.var) is UnicodeType:
digest = unicode(digest)
path = os.path.join(self.var, digest)
if not os.path.isfile(path):
raise KeyError, digest
return path
def getSize(self, digest):
return os.path.getsize(self.getPath(digest))
def open(self, digest):
return ReadFile(self.getPath(digest))
def delete(self,digest):
"""delete the file"""
path=self.getPath(digest)
if os.path.exists(path):
if self.refCounter.removeReference(digest) is 0:
os.remove(path)
self.refCounter.commit()
return
class ReadFile(object):
"""A lazy read file implementation"""
interface.implements(interfaces.IReadFile)
def __init__(self, name, bufsize=-1):
self.name = name
self.digest = str(os.path.split(self.name)[1])
self.bufsize=bufsize
self._v_len = None
self._v_file = None
@property
def _file(self):
if not self.closed:
return self._v_file
self._v_file = file(self.name, 'rb', self.bufsize)
return self._v_file
@Lazy
def ctime(self):
return int(os.stat(self.name)[stat.ST_CTIME])
@Lazy
def atime(self):
return int(os.stat(self.name)[stat.ST_ATIME])
def __len__(self):
if self._v_len is None:
self._v_len = int(os.stat(self.name)[stat.ST_SIZE])
return self._v_len
def __repr__(self):
return "<ReadFile named %s>" % repr(self.digest)
@property
def closed(self):
"""like file closed, but lazy"""
return self._v_file is None or self._v_file.closed
def seek(self, offset, whence=0):
"""see file.seek"""
# we optimize when we have 0, 0 then we do not need to open
# the file if it is closed, because on the next read we are at
# 0
if offset==0 and whence==0 and self.closed:
return
return self._file.seek(offset, whence)
def tell(self):
"""see file.tell"""
if self.closed:
return 0
return self._file.tell()
def read(self, size=-1):
"""see file.read"""
return self._file.read(size)
def close(self):
"""see file.close"""
if not self.closed:
self._v_file.close()
self._v_file = None
def fileno(self):
return self._file.fileno()
def __iter__(self):
return self._file.__iter__()
class WriteFile(object):
interface.implements(interfaces.IWriteFile)
def __init__(self, hd, handle, path):
self.hd = hd
self.handle = handle
self.path = path
self.sha = sha.new()
self._pos = 0
def write(self, s):
self.sha.update(s)
os.write(self.handle, s)
self._pos += len(s)
def commit(self):
"""returns the sha digest and saves the file"""
os.close(self.handle)
return self.hd.commit(self)
def tell(self):
"""see file.tell"""
return self._pos
def abort(self):
"""abort the write and delete file"""
os.close(self.handle)
os.unlink(self.path)
--------------------------------------------------------------------------------------
property.py
-------------------------------------------------------------------------------------
from zope import component
import interfaces
from cStringIO import StringIO
from datamanager import getFile, _storage
_marker = object()
BLOCK_SIZE = 1024*128
class ExtBytesProperty(object):
"""a property which's values are stored as external files"""
def __init__(self, name):
self.__name = name
def __delete__(self,inst):
digest = inst.__dict__[self.__name]
self.hd.delete(digest)
@property
def hd(self):
return component.getUtility(interfaces.IHashDir)
def __get__(self, inst, klass):
if inst is None:
return self
digest = inst.__dict__.get(self.__name, _marker)
if digest is _marker:
return None
return getFile(digest)
def __set__(self, inst, value):
# ignore if value is None
if value is None:
if inst.__dict__.has_key(self.__name):
del inst.__dict__[self.__name]
return
# Handle case when value is a string
if isinstance(value, unicode):
value = value.encode('UTF-8')
if isinstance(value, str):
value = StringIO(value)
value.seek(0)
f = self.hd.new()
while True:
chunk = value.read(BLOCK_SIZE)
if not chunk:
newDigest = f.commit()
oldDigest = inst.__dict__.get(self.__name, _marker)
if newDigest == oldDigest:
# we have no change, so we have to seek to zero
# because this is normal behaviour when setting a
# new value
if hasattr(_storage, 'dataManager'):
if newDigest in _storage.dataManager.files:
f = _storage.dataManager.files[newDigest]
f.seek(0)
else:
inst.__dict__[self.__name] = newDigest
"""
case 1: oldDigest=newDigets though the object is still
refering
to same file f.comit would have incremented the
reference by one.
So to balance it oldDigest must be deleted.
case 2: object referencing a new file. So old file must
be deleted
"""
if oldDigest is not _marker:
self.hd.delete(oldDigest)
break
f.write(chunk)
--------------------------------------------------------------------------------------
file/file.py
--------------------------------------------------------------------------------------
from persistent import Persistent
from z3c.extfile.property import ExtBytesProperty
from interfaces import IExtFile
from zope import interface
class ExtFile(Persistent):
"""A zope file implementation based on z3c.extfile"""
interface.implements(IExtFile)
data = ExtBytesProperty('data')
def __init__(self, data='', contentType=''):
self.data = data
self.contentType = contentType
def __del__(self):
del self.data
print "deleted data"
def delete(self):
del self.data
print "deleted data via delete()"
def getSize(self):
return len(self.data)
--------------------------------------------------------------------------------------
rgds,
jayaraj
>>
>> But form a general perspective you are correct.... may be HashDir will
>> have to evolve to add reference counting feture just like python do!
>>
>> On Wed, Sep 24, 2008 at 11:34 AM, Jürgen kartnaller <
>> juergen.kartnaller at gmail.com> wrote:
>>
>>> Hi jayaraj,
>>> it is definitely not a good idea to implicitily delete files.
>>>
>>> What if two ExtFile objects reference the same file ?
>>>
>>> Jürgen
>>>
>>> On Wed, Sep 24, 2008 at 4:37 AM, Jayarajan Jn <jayarajan at musmo.com>wrote:
>>>
>>>> Hi,
>>>> i am now doing some prototypes for my projects which will be dealing
>>>> with tones of files. After a little scouting i decided to try z3c.extfile.
>>>> Every thing works fine. But i found it strange that there is no delete
>>>> feature in z3c.extfile. ie, even if i can delete a ExtFile object, the file
>>>> in the hash directory is not getting deleted! and it keeps on
>>>> accumulating...
>>>>
>>>> So i thought i 'll add a delete feature... but my __del__() approach
>>>> doesn't work for me. but i added an additional delete() function too which
>>>> can be invoked explicitly to delete the file before trying to delete ExtFile
>>>> object.
>>>>
>>>> i made following changes to the source...
>>>> inside z3c.extfile.file.file.ExtFile,
>>>>
>>>>
>>>> ----------------------------------------------------------------------------------------------
>>>> class ExtFile(Persistent):
>>>>
>>>> """A zope file implementation based on z3c.extfile"""
>>>>
>>>> interface.implements(IExtFile)
>>>> data = ExtBytesProperty('data')
>>>>
>>>> def __init__(self, data='', contentType=''):
>>>> self.data = data
>>>> self.contentType = contentType
>>>>
>>>> # added the following lines#
>>>>
>>>> * def __del__(self): # <- this is not being invoked when i try to
>>>> delete an extfile object
>>>> del self.data
>>>> #print "deleted data via destructor"
>>>>
>>>> def delete(self): # <- added this to be able to manually able to
>>>> delete files
>>>> del self.data
>>>> #print "deleted data via delete()"
>>>>
>>>> # # # # # # # # # # # # # # #
>>>> *
>>>> def getSize(self):
>>>> return len(self.data)
>>>>
>>>>
>>>> ----------------------------------------------------------------------------------------------
>>>>
>>>> and the 'data' is a 'property' (ExtBytesProperty)
>>>> so i made following changes to z3c.extfile.property.ExtBytesProperty
>>>>
>>>>
>>>> ----------------------------------------------------------------------------------------------
>>>> class ExtBytesProperty(object):
>>>>
>>>> """a property which's values are stored as external files"""
>>>>
>>>> def __init__(self, name):
>>>> self.__name = name
>>>>
>>>>
>>>>
>>>> # added the following lines#
>>>>
>>>> * def __delete__(self,inst):
>>>> digest = inst.__dict__[self.__name]
>>>> self.hd.delete(digest)
>>>>
>>>> * *# # # # # # # # # # # # # # #
>>>>
>>>>
>>>>
>>>> * @property
>>>> def hd(self):
>>>> return component.getUtility(interfaces.IHashDir)
>>>>
>>>> def __get__(self, inst, klass):
>>>>
>>>> if inst is None:
>>>> return self
>>>> digest = inst.__dict__.get(self.__name, _marker)
>>>> if digest is _marker:
>>>> return None
>>>> return getFile(digest)
>>>>
>>>> def __set__(self, inst, value):
>>>> # ignore if value is None
>>>> if value is None:
>>>> if inst.__dict__.has_key(self.__name):
>>>> del inst.__dict__[self.__name]
>>>> return
>>>> # Handle case when value is a string
>>>> if isinstance(value, unicode):
>>>> value = value.encode('UTF-8')
>>>> if isinstance(value, str):
>>>> value = StringIO(value)
>>>> value.seek(0)
>>>> f = self.hd.new()
>>>> while True:
>>>> chunk = value.read(BLOCK_SIZE)
>>>> if not chunk:
>>>> newDigest = f.commit()
>>>> oldDigest = inst.__dict__.get(self.__name, _marker)
>>>> if newDigest == oldDigest:
>>>> # we have no change, so we have to seek to zero
>>>> # because this is normal behaviour when setting a
>>>> # new value
>>>> if hasattr(_storage, 'dataManager'):
>>>> if newDigest in _storage.dataManager.files:
>>>> f = _storage.dataManager.files[newDigest]
>>>> f.seek(0)
>>>> else:
>>>> inst.__dict__[self.__name] = newDigest
>>>> break
>>>> f.write(chunk)
>>>>
>>>> ----------------------------------------------------------------------------------------------------
>>>>
>>>> and at last added the real code which delete the file in hash directory
>>>> too
>>>>
>>>> i added following codes inside z3c.extfile.hashdir.HashDir class
>>>> ---------------------------------------------------
>>>> def delete(self,digest):
>>>> """delete the file"""
>>>> path=self.getPath(digest)
>>>> if os.path.exists(path):
>>>> os.remove(path)
>>>> return
>>>> ----------------------------------------------------
>>>>
>>>> Now, everything works fine when i try to delete an ExtFile object in
>>>> ZODB, __del__() is not being invoked!!!!!
>>>>
>>>> can anyone tell me how can i fix this???
>>>>
>>>> thanks in advance
>>>>
>>>> jayaraj
>>>>
>>>> _______________________________________________
>>>> Zope-Dev maillist - Zope-Dev at zope.org
>>>> http://mail.zope.org/mailman/listinfo/zope-dev
>>>> ** No cross posts or HTML encoding! **
>>>> (Related lists -
>>>> http://mail.zope.org/mailman/listinfo/zope-announce
>>>> http://mail.zope.org/mailman/listinfo/zope )
>>>>
>>>>
>>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mail.zope.org/pipermail/zope-dev/attachments/20080926/c1e8a949/attachment-0001.html
-------------- next part --------------
A non-text attachment was scrubbed...
Name: z3c.extfile-diffs.tar.gz
Type: application/x-gzip
Size: 1303 bytes
Desc: not available
Url : http://mail.zope.org/pipermail/zope-dev/attachments/20080926/c1e8a949/attachment-0002.gz
-------------- next part --------------
A non-text attachment was scrubbed...
Name: z3c.extfile-modified.tar.gz
Type: application/x-gzip
Size: 21492 bytes
Desc: not available
Url : http://mail.zope.org/pipermail/zope-dev/attachments/20080926/c1e8a949/attachment-0003.gz
More information about the Zope-Dev
mailing list