[Zope-dev] delete feature to z3c.extfile
Jayarajan Jn
jayarajan at musmo.com
Fri Sep 26 09:50:18 EDT 2008
hi,
for the convenience of all separately attaching the diffs
rgds,
jayaraj
On Fri, Sep 26, 2008 at 7:08 PM, Jayarajan Jn <jayarajan at musmo.com> wrote:
> On Wed, Sep 24, 2008 at 8:58 PM, Jürgen kartnaller <
> juergen.kartnaller at gmail.com> wrote:
>
>>
>>
>> On Wed, Sep 24, 2008 at 9:10 AM, Jayarajan Jn <jayarajan at musmo.com>wrote:
>>
>>> Hi Jürgen,
>>>
>>> First of all thank you for your reply....
>>> hmm you are right too... And i am comfortable with the explicit delete.
>>> But in my prototype for i have also added webDAV interface for managing
>>> extfile.... Then when a user deletes the extfile object through webDAV
>>> interface, the file remains there.... coz i don't know where to put my codes
>>> to call delete() in that case... And in my project there wont be more than
>>> one extfiles refering to same file! so in my case its ok to go for implicit
>>> delete! but its now working!
>>>
>>
>> You can never be sure if a file is only used once. If two user upload the
>> same file only one copy is stored in extfile because both files have the
>> same hash.
>>
>
> Yea thats right ... we will never know how many extfile objects share the
> file. But there is enough resources to be able to know it.
>
> We can add a reference counting functionality to HashDir class. And then
> we can keep track of this.... see my code bellow.... i have created a
> ReferenceCounter and it works fine. I can now implicitly delete with out
> worry.
>
>
> But still when i use it in my zope application the '__del__' itself is
> *not* being invoked :?
>
> plz tell me what you think....
>
> referencecounter.py:-
>
> ------------------------------------------------------------------------------------
> import os
> from persistent import Persistent
>
> class ReferenceCounter(Persistent):
> """ keeps track number of ExtBytesProperties
> refering to the a file in HashDir """
>
> def __init__(self, dirpath):
> self.path = os.path.join(dirpath,'refcount')
> self.counts={}
> if os.path.exists(self.path): # else case? we will deal with it in
> commit!
> f= open(self.path)
> for line in f:
> digest, count=line.split(',')
> self.counts[digest]=int(count)
> f.close()
>
> def getCount(self,digest):
> return self.counts.get(digest,0)
>
> def getTotalFiles(self):
> return len(self.counts)
>
> def addReference(self,digest):
> newCount=self.getCount(digest)+1
> self.counts[digest]=newCount
> return newCount
>
> def removeReference(self,digest):
> newCount=self.getCount(digest)-1
> if newCount > 0:
> self.counts[digest]=newCount
> elif newCount == 0:
> del self.counts[digest]
> return newCount
>
> def commit(self):
> f=open(self.path,'w')
> f.write('\n'.join("%s,%s" % (digest,count)
> for digest,count in self.counts.items()))
> f.close()
>
> --------------------------------------------------------------------------------
> hashdir.py
>
> --------------------------------------------------------------------------------
> import sha
> import os
> import stat
> import tempfile
> import shutil
> from types import StringTypes, UnicodeType
> import interfaces
> from zope import interface
> from persistent import Persistent
> from zope.cachedescriptors.property import Lazy
> from referencecounter import ReferenceCounter
>
> class HashDir(Persistent):
>
> """a directory holding files named after their sha1 hash"""
>
> interface.implements(interfaces.IHashDir)
> _path = None
>
> def __init__(self, path=None):
> self.path = path
> self.refCounter = ReferenceCounter(self.etc)
>
> def _setPath(self, path):
> if path is None:
> return
> self._path = os.path.abspath(path)
> self.tmp = os.path.join(self.path, 'tmp')
> self.var = os.path.join(self.path, 'var')
> self.etc = os.path.join(self.path, 'etc')
> self._initPaths()
>
> def _getPath(self):
> return self._path
>
> path = property(_getPath,_setPath)
>
> def _initPaths(self):
> for path in [self.path,self.var,self.tmp,self.etc]:
> if not os.path.exists(path):
> os.mkdir(path)
>
> def new(self):
> """returns a new filehandle"""
> handle, path = tempfile.mkstemp(prefix='dirty.',
> dir=self.tmp)
> return WriteFile(self, handle, path)
>
> def commit(self, f):
> """commit a file, this is called by the file"""
> digest = f.sha.hexdigest()
> target = os.path.join(self.var, digest)
> if os.path.exists(target):
> # we have that content so just delete the tmp file
> os.remove(f.path)
> else:
> shutil.move(f.path, target)
> os.chmod(target, 0440)
> self.refCounter.addReference(digest)
> self.refCounter.commit()
> return digest
>
> def digests(self):
> """returns all digests stored"""
> return os.listdir(self.var)
>
> def getPath(self, digest):
> if type(digest) not in StringTypes or len(digest) != 40:
> raise ValueError, repr(digest)
> if type(self.var) is UnicodeType:
> digest = unicode(digest)
> path = os.path.join(self.var, digest)
> if not os.path.isfile(path):
> raise KeyError, digest
> return path
>
> def getSize(self, digest):
> return os.path.getsize(self.getPath(digest))
>
> def open(self, digest):
> return ReadFile(self.getPath(digest))
>
> def delete(self,digest):
> """delete the file"""
> path=self.getPath(digest)
> if os.path.exists(path):
> if self.refCounter.removeReference(digest) is 0:
>
> os.remove(path)
> self.refCounter.commit()
> return
>
>
> class ReadFile(object):
>
> """A lazy read file implementation"""
>
> interface.implements(interfaces.IReadFile)
>
> def __init__(self, name, bufsize=-1):
> self.name = name
> self.digest = str(os.path.split(self.name)[1])
> self.bufsize=bufsize
> self._v_len = None
> self._v_file = None
>
> @property
> def _file(self):
> if not self.closed:
> return self._v_file
> self._v_file = file(self.name, 'rb', self.bufsize)
> return self._v_file
>
> @Lazy
> def ctime(self):
> return int(os.stat(self.name)[stat.ST_CTIME])
>
> @Lazy
> def atime(self):
> return int(os.stat(self.name)[stat.ST_ATIME])
>
> def __len__(self):
> if self._v_len is None:
> self._v_len = int(os.stat(self.name)[stat.ST_SIZE])
> return self._v_len
>
> def __repr__(self):
> return "<ReadFile named %s>" % repr(self.digest)
>
> @property
> def closed(self):
> """like file closed, but lazy"""
> return self._v_file is None or self._v_file.closed
>
> def seek(self, offset, whence=0):
> """see file.seek"""
> # we optimize when we have 0, 0 then we do not need to open
> # the file if it is closed, because on the next read we are at
> # 0
> if offset==0 and whence==0 and self.closed:
> return
> return self._file.seek(offset, whence)
>
> def tell(self):
> """see file.tell"""
> if self.closed:
> return 0
> return self._file.tell()
>
> def read(self, size=-1):
> """see file.read"""
> return self._file.read(size)
>
> def close(self):
> """see file.close"""
> if not self.closed:
> self._v_file.close()
> self._v_file = None
>
> def fileno(self):
> return self._file.fileno()
>
> def __iter__(self):
> return self._file.__iter__()
>
>
> class WriteFile(object):
>
> interface.implements(interfaces.IWriteFile)
>
> def __init__(self, hd, handle, path):
> self.hd = hd
> self.handle = handle
> self.path = path
> self.sha = sha.new()
> self._pos = 0
>
> def write(self, s):
> self.sha.update(s)
> os.write(self.handle, s)
> self._pos += len(s)
>
> def commit(self):
> """returns the sha digest and saves the file"""
> os.close(self.handle)
> return self.hd.commit(self)
>
> def tell(self):
> """see file.tell"""
> return self._pos
>
> def abort(self):
> """abort the write and delete file"""
> os.close(self.handle)
> os.unlink(self.path)
>
> --------------------------------------------------------------------------------------
> property.py
>
> -------------------------------------------------------------------------------------
> from zope import component
> import interfaces
> from cStringIO import StringIO
>
> from datamanager import getFile, _storage
>
> _marker = object()
>
> BLOCK_SIZE = 1024*128
>
> class ExtBytesProperty(object):
>
> """a property which's values are stored as external files"""
>
> def __init__(self, name):
> self.__name = name
>
> def __delete__(self,inst):
> digest = inst.__dict__[self.__name]
> self.hd.delete(digest)
>
> @property
> def hd(self):
> return component.getUtility(interfaces.IHashDir)
>
> def __get__(self, inst, klass):
>
> if inst is None:
> return self
> digest = inst.__dict__.get(self.__name, _marker)
> if digest is _marker:
> return None
> return getFile(digest)
>
> def __set__(self, inst, value):
> # ignore if value is None
> if value is None:
> if inst.__dict__.has_key(self.__name):
> del inst.__dict__[self.__name]
> return
> # Handle case when value is a string
> if isinstance(value, unicode):
> value = value.encode('UTF-8')
> if isinstance(value, str):
> value = StringIO(value)
> value.seek(0)
> f = self.hd.new()
> while True:
> chunk = value.read(BLOCK_SIZE)
> if not chunk:
> newDigest = f.commit()
> oldDigest = inst.__dict__.get(self.__name, _marker)
> if newDigest == oldDigest:
> # we have no change, so we have to seek to zero
> # because this is normal behaviour when setting a
> # new value
> if hasattr(_storage, 'dataManager'):
> if newDigest in _storage.dataManager.files:
> f = _storage.dataManager.files[newDigest]
> f.seek(0)
> else:
> inst.__dict__[self.__name] = newDigest
> """
> case 1: oldDigest=newDigets though the object is still
> refering
> to same file f.comit would have incremented the
> reference by one.
> So to balance it oldDigest must be deleted.
> case 2: object referencing a new file. So old file must
> be deleted
> """
> if oldDigest is not _marker:
> self.hd.delete(oldDigest)
>
> break
> f.write(chunk)
>
>
> --------------------------------------------------------------------------------------
> file/file.py
>
> --------------------------------------------------------------------------------------
> from persistent import Persistent
> from z3c.extfile.property import ExtBytesProperty
> from interfaces import IExtFile
> from zope import interface
>
> class ExtFile(Persistent):
>
> """A zope file implementation based on z3c.extfile"""
>
> interface.implements(IExtFile)
> data = ExtBytesProperty('data')
>
> def __init__(self, data='', contentType=''):
> self.data = data
> self.contentType = contentType
>
> def __del__(self):
> del self.data
> print "deleted data"
>
>
> def delete(self):
>
> del self.data
> print "deleted data via delete()"
>
> def getSize(self):
> return len(self.data)
>
> --------------------------------------------------------------------------------------
>
> rgds,
>
> jayaraj
>
>
>
>>>
>>> But form a general perspective you are correct.... may be HashDir will
>>> have to evolve to add reference counting feture just like python do!
>>>
>>> On Wed, Sep 24, 2008 at 11:34 AM, Jürgen kartnaller <
>>> juergen.kartnaller at gmail.com> wrote:
>>>
>>>> Hi jayaraj,
>>>> it is definitely not a good idea to implicitily delete files.
>>>>
>>>> What if two ExtFile objects reference the same file ?
>>>>
>>>> Jürgen
>>>>
>>>> On Wed, Sep 24, 2008 at 4:37 AM, Jayarajan Jn <jayarajan at musmo.com>wrote:
>>>>
>>>>> Hi,
>>>>> i am now doing some prototypes for my projects which will be dealing
>>>>> with tones of files. After a little scouting i decided to try z3c.extfile.
>>>>> Every thing works fine. But i found it strange that there is no delete
>>>>> feature in z3c.extfile. ie, even if i can delete a ExtFile object, the file
>>>>> in the hash directory is not getting deleted! and it keeps on
>>>>> accumulating...
>>>>>
>>>>> So i thought i 'll add a delete feature... but my __del__() approach
>>>>> doesn't work for me. but i added an additional delete() function too which
>>>>> can be invoked explicitly to delete the file before trying to delete ExtFile
>>>>> object.
>>>>>
>>>>> i made following changes to the source...
>>>>> inside z3c.extfile.file.file.ExtFile,
>>>>>
>>>>>
>>>>> ----------------------------------------------------------------------------------------------
>>>>> class ExtFile(Persistent):
>>>>>
>>>>> """A zope file implementation based on z3c.extfile"""
>>>>>
>>>>> interface.implements(IExtFile)
>>>>> data = ExtBytesProperty('data')
>>>>>
>>>>> def __init__(self, data='', contentType=''):
>>>>> self.data = data
>>>>> self.contentType = contentType
>>>>>
>>>>> # added the following lines#
>>>>>
>>>>> * def __del__(self): # <- this is not being invoked when i try to
>>>>> delete an extfile object
>>>>> del self.data
>>>>> #print "deleted data via destructor"
>>>>>
>>>>> def delete(self): # <- added this to be able to manually able
>>>>> to delete files
>>>>> del self.data
>>>>> #print "deleted data via delete()"
>>>>>
>>>>> # # # # # # # # # # # # # # #
>>>>> *
>>>>> def getSize(self):
>>>>> return len(self.data)
>>>>>
>>>>>
>>>>> ----------------------------------------------------------------------------------------------
>>>>>
>>>>> and the 'data' is a 'property' (ExtBytesProperty)
>>>>> so i made following changes to z3c.extfile.property.ExtBytesProperty
>>>>>
>>>>>
>>>>> ----------------------------------------------------------------------------------------------
>>>>> class ExtBytesProperty(object):
>>>>>
>>>>> """a property which's values are stored as external files"""
>>>>>
>>>>> def __init__(self, name):
>>>>> self.__name = name
>>>>>
>>>>>
>>>>>
>>>>> # added the following lines#
>>>>>
>>>>> * def __delete__(self,inst):
>>>>> digest = inst.__dict__[self.__name]
>>>>> self.hd.delete(digest)
>>>>>
>>>>> * *# # # # # # # # # # # # # # #
>>>>>
>>>>>
>>>>>
>>>>> * @property
>>>>> def hd(self):
>>>>> return component.getUtility(interfaces.IHashDir)
>>>>>
>>>>> def __get__(self, inst, klass):
>>>>>
>>>>> if inst is None:
>>>>> return self
>>>>> digest = inst.__dict__.get(self.__name, _marker)
>>>>> if digest is _marker:
>>>>> return None
>>>>> return getFile(digest)
>>>>>
>>>>> def __set__(self, inst, value):
>>>>> # ignore if value is None
>>>>> if value is None:
>>>>> if inst.__dict__.has_key(self.__name):
>>>>> del inst.__dict__[self.__name]
>>>>> return
>>>>> # Handle case when value is a string
>>>>> if isinstance(value, unicode):
>>>>> value = value.encode('UTF-8')
>>>>> if isinstance(value, str):
>>>>> value = StringIO(value)
>>>>> value.seek(0)
>>>>> f = self.hd.new()
>>>>> while True:
>>>>> chunk = value.read(BLOCK_SIZE)
>>>>> if not chunk:
>>>>> newDigest = f.commit()
>>>>> oldDigest = inst.__dict__.get(self.__name, _marker)
>>>>> if newDigest == oldDigest:
>>>>> # we have no change, so we have to seek to zero
>>>>> # because this is normal behaviour when setting a
>>>>> # new value
>>>>> if hasattr(_storage, 'dataManager'):
>>>>> if newDigest in _storage.dataManager.files:
>>>>> f = _storage.dataManager.files[newDigest]
>>>>> f.seek(0)
>>>>> else:
>>>>> inst.__dict__[self.__name] = newDigest
>>>>> break
>>>>> f.write(chunk)
>>>>>
>>>>> ----------------------------------------------------------------------------------------------------
>>>>>
>>>>> and at last added the real code which delete the file in hash directory
>>>>> too
>>>>>
>>>>> i added following codes inside z3c.extfile.hashdir.HashDir class
>>>>> ---------------------------------------------------
>>>>> def delete(self,digest):
>>>>> """delete the file"""
>>>>> path=self.getPath(digest)
>>>>> if os.path.exists(path):
>>>>> os.remove(path)
>>>>> return
>>>>> ----------------------------------------------------
>>>>>
>>>>> Now, everything works fine when i try to delete an ExtFile object in
>>>>> ZODB, __del__() is not being invoked!!!!!
>>>>>
>>>>> can anyone tell me how can i fix this???
>>>>>
>>>>> thanks in advance
>>>>>
>>>>> jayaraj
>>>>>
>>>>> _______________________________________________
>>>>> Zope-Dev maillist - Zope-Dev at zope.org
>>>>> http://mail.zope.org/mailman/listinfo/zope-dev
>>>>> ** No cross posts or HTML encoding! **
>>>>> (Related lists -
>>>>> http://mail.zope.org/mailman/listinfo/zope-announce
>>>>> http://mail.zope.org/mailman/listinfo/zope )
>>>>>
>>>>>
>>>>
>>>
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mail.zope.org/pipermail/zope-dev/attachments/20080926/b0ab197d/attachment-0001.html
-------------- next part --------------
A non-text attachment was scrubbed...
Name: hashdir.diff
Type: text/x-patch
Size: 855 bytes
Desc: not available
Url : http://mail.zope.org/pipermail/zope-dev/attachments/20080926/b0ab197d/attachment-0003.bin
-------------- next part --------------
A non-text attachment was scrubbed...
Name: property.diff
Type: text/x-patch
Size: 808 bytes
Desc: not available
Url : http://mail.zope.org/pipermail/zope-dev/attachments/20080926/b0ab197d/attachment-0004.bin
-------------- next part --------------
A non-text attachment was scrubbed...
Name: file.diff
Type: text/x-patch
Size: 218 bytes
Desc: not available
Url : http://mail.zope.org/pipermail/zope-dev/attachments/20080926/b0ab197d/attachment-0005.bin
More information about the Zope-Dev
mailing list