On Wed, Sep 24, 2008 at 8:58 PM, Jürgen kartnaller < juergen.kartnaller@gmail.com> wrote:
On Wed, Sep 24, 2008 at 9:10 AM, Jayarajan Jn jayarajan@musmo.com wrote:
Hi Jürgen,
First of all thank you for your reply.... hmm you are right too... And i am comfortable with the explicit delete. But in my prototype for i have also added webDAV interface for managing extfile.... Then when a user deletes the extfile object through webDAV interface, the file remains there.... coz i don't know where to put my codes to call delete() in that case... And in my project there wont be more than one extfiles refering to same file! so in my case its ok to go for implicit delete! but its now working!
You can never be sure if a file is only used once. If two user upload the same file only one copy is stored in extfile because both files have the same hash.
Yea thats right ... we will never know how many extfile objects share the file. But there is enough resources to be able to know it.
We can add a reference counting functionality to HashDir class. And then we can keep track of this.... see my code bellow.... i have created a ReferenceCounter and it works fine. I can now implicitly delete with out worry.
But still when i use it in my zope application the '__del__' itself is *not* being invoked :?
plz tell me what you think....
referencecounter.py:- ------------------------------------------------------------------------------------ import os from persistent import Persistent
class ReferenceCounter(Persistent): """ keeps track number of ExtBytesProperties refering to the a file in HashDir """
def __init__(self, dirpath): self.path = os.path.join(dirpath,'refcount') self.counts={} if os.path.exists(self.path): # else case? we will deal with it in commit! f= open(self.path) for line in f: digest, count=line.split(',') self.counts[digest]=int(count) f.close()
def getCount(self,digest): return self.counts.get(digest,0)
def getTotalFiles(self): return len(self.counts)
def addReference(self,digest): newCount=self.getCount(digest)+1 self.counts[digest]=newCount return newCount
def removeReference(self,digest): newCount=self.getCount(digest)-1 if newCount > 0: self.counts[digest]=newCount elif newCount == 0: del self.counts[digest] return newCount
def commit(self): f=open(self.path,'w') f.write('\n'.join("%s,%s" % (digest,count) for digest,count in self.counts.items())) f.close() -------------------------------------------------------------------------------- hashdir.py -------------------------------------------------------------------------------- import sha import os import stat import tempfile import shutil from types import StringTypes, UnicodeType import interfaces from zope import interface from persistent import Persistent from zope.cachedescriptors.property import Lazy from referencecounter import ReferenceCounter
class HashDir(Persistent):
"""a directory holding files named after their sha1 hash"""
interface.implements(interfaces.IHashDir) _path = None
def __init__(self, path=None): self.path = path self.refCounter = ReferenceCounter(self.etc)
def _setPath(self, path): if path is None: return self._path = os.path.abspath(path) self.tmp = os.path.join(self.path, 'tmp') self.var = os.path.join(self.path, 'var') self.etc = os.path.join(self.path, 'etc') self._initPaths()
def _getPath(self): return self._path
path = property(_getPath,_setPath)
def _initPaths(self): for path in [self.path,self.var,self.tmp,self.etc]: if not os.path.exists(path): os.mkdir(path)
def new(self): """returns a new filehandle""" handle, path = tempfile.mkstemp(prefix='dirty.', dir=self.tmp) return WriteFile(self, handle, path)
def commit(self, f): """commit a file, this is called by the file""" digest = f.sha.hexdigest() target = os.path.join(self.var, digest) if os.path.exists(target): # we have that content so just delete the tmp file os.remove(f.path) else: shutil.move(f.path, target) os.chmod(target, 0440) self.refCounter.addReference(digest) self.refCounter.commit() return digest
def digests(self): """returns all digests stored""" return os.listdir(self.var)
def getPath(self, digest): if type(digest) not in StringTypes or len(digest) != 40: raise ValueError, repr(digest) if type(self.var) is UnicodeType: digest = unicode(digest) path = os.path.join(self.var, digest) if not os.path.isfile(path): raise KeyError, digest return path
def getSize(self, digest): return os.path.getsize(self.getPath(digest))
def open(self, digest): return ReadFile(self.getPath(digest))
def delete(self,digest): """delete the file""" path=self.getPath(digest) if os.path.exists(path): if self.refCounter.removeReference(digest) is 0: os.remove(path) self.refCounter.commit() return
class ReadFile(object):
"""A lazy read file implementation"""
interface.implements(interfaces.IReadFile)
def __init__(self, name, bufsize=-1): self.name = name self.digest = str(os.path.split(self.name)[1]) self.bufsize=bufsize self._v_len = None self._v_file = None
@property def _file(self): if not self.closed: return self._v_file self._v_file = file(self.name, 'rb', self.bufsize) return self._v_file
@Lazy def ctime(self): return int(os.stat(self.name)[stat.ST_CTIME])
@Lazy def atime(self): return int(os.stat(self.name)[stat.ST_ATIME])
def __len__(self): if self._v_len is None: self._v_len = int(os.stat(self.name)[stat.ST_SIZE]) return self._v_len
def __repr__(self): return "<ReadFile named %s>" % repr(self.digest)
@property def closed(self): """like file closed, but lazy""" return self._v_file is None or self._v_file.closed
def seek(self, offset, whence=0): """see file.seek""" # we optimize when we have 0, 0 then we do not need to open # the file if it is closed, because on the next read we are at # 0 if offset==0 and whence==0 and self.closed: return return self._file.seek(offset, whence)
def tell(self): """see file.tell""" if self.closed: return 0 return self._file.tell()
def read(self, size=-1): """see file.read""" return self._file.read(size)
def close(self): """see file.close""" if not self.closed: self._v_file.close() self._v_file = None
def fileno(self): return self._file.fileno()
def __iter__(self): return self._file.__iter__()
class WriteFile(object):
interface.implements(interfaces.IWriteFile)
def __init__(self, hd, handle, path): self.hd = hd self.handle = handle self.path = path self.sha = sha.new() self._pos = 0
def write(self, s): self.sha.update(s) os.write(self.handle, s) self._pos += len(s)
def commit(self): """returns the sha digest and saves the file""" os.close(self.handle) return self.hd.commit(self)
def tell(self): """see file.tell""" return self._pos
def abort(self): """abort the write and delete file""" os.close(self.handle) os.unlink(self.path) -------------------------------------------------------------------------------------- property.py ------------------------------------------------------------------------------------- from zope import component import interfaces from cStringIO import StringIO
from datamanager import getFile, _storage
_marker = object()
BLOCK_SIZE = 1024*128
class ExtBytesProperty(object):
"""a property which's values are stored as external files"""
def __init__(self, name): self.__name = name
def __delete__(self,inst): digest = inst.__dict__[self.__name] self.hd.delete(digest)
@property def hd(self): return component.getUtility(interfaces.IHashDir)
def __get__(self, inst, klass):
if inst is None: return self digest = inst.__dict__.get(self.__name, _marker) if digest is _marker: return None return getFile(digest)
def __set__(self, inst, value): # ignore if value is None if value is None: if inst.__dict__.has_key(self.__name): del inst.__dict__[self.__name] return # Handle case when value is a string if isinstance(value, unicode): value = value.encode('UTF-8') if isinstance(value, str): value = StringIO(value) value.seek(0) f = self.hd.new() while True: chunk = value.read(BLOCK_SIZE) if not chunk: newDigest = f.commit() oldDigest = inst.__dict__.get(self.__name, _marker) if newDigest == oldDigest: # we have no change, so we have to seek to zero # because this is normal behaviour when setting a # new value if hasattr(_storage, 'dataManager'): if newDigest in _storage.dataManager.files: f = _storage.dataManager.files[newDigest] f.seek(0) else: inst.__dict__[self.__name] = newDigest """ case 1: oldDigest=newDigets though the object is still refering to same file f.comit would have incremented the reference by one. So to balance it oldDigest must be deleted. case 2: object referencing a new file. So old file must be deleted """ if oldDigest is not _marker: self.hd.delete(oldDigest)
break f.write(chunk)
-------------------------------------------------------------------------------------- file/file.py -------------------------------------------------------------------------------------- from persistent import Persistent from z3c.extfile.property import ExtBytesProperty from interfaces import IExtFile from zope import interface
class ExtFile(Persistent):
"""A zope file implementation based on z3c.extfile"""
interface.implements(IExtFile) data = ExtBytesProperty('data')
def __init__(self, data='', contentType=''): self.data = data self.contentType = contentType
def __del__(self): del self.data print "deleted data"
def delete(self):
del self.data print "deleted data via delete()"
def getSize(self): return len(self.data) --------------------------------------------------------------------------------------
rgds,
jayaraj
But form a general perspective you are correct.... may be HashDir will have to evolve to add reference counting feture just like python do!
On Wed, Sep 24, 2008 at 11:34 AM, Jürgen kartnaller < juergen.kartnaller@gmail.com> wrote:
Hi jayaraj, it is definitely not a good idea to implicitily delete files.
What if two ExtFile objects reference the same file ?
Jürgen
On Wed, Sep 24, 2008 at 4:37 AM, Jayarajan Jn jayarajan@musmo.comwrote:
Hi, i am now doing some prototypes for my projects which will be dealing with tones of files. After a little scouting i decided to try z3c.extfile. Every thing works fine. But i found it strange that there is no delete feature in z3c.extfile. ie, even if i can delete a ExtFile object, the file in the hash directory is not getting deleted! and it keeps on accumulating...
So i thought i 'll add a delete feature... but my __del__() approach doesn't work for me. but i added an additional delete() function too which can be invoked explicitly to delete the file before trying to delete ExtFile object.
i made following changes to the source... inside z3c.extfile.file.file.ExtFile,
class ExtFile(Persistent):
"""A zope file implementation based on z3c.extfile""" interface.implements(IExtFile) data = ExtBytesProperty('data') def __init__(self, data='', contentType=''): self.data = data self.contentType = contentType # added the following lines#
- def __del__(self): # <- this is not being invoked when i try to
delete an extfile object del self.data #print "deleted data via destructor"
def delete(self): # <- added this to be able to manually able to
delete files del self.data #print "deleted data via delete()"
# # # # # # # # # # # # # # #
- def getSize(self): return len(self.data)
and the 'data' is a 'property' (ExtBytesProperty) so i made following changes to z3c.extfile.property.ExtBytesProperty
class ExtBytesProperty(object):
"""a property which's values are stored as external files""" def __init__(self, name): self.__name = name # added the following lines#
def __delete__(self,inst): digest = inst.__dict__[self.__name] self.hd.delete(digest)
*# # # # # # # # # # # # # # #
@property def hd(self): return component.getUtility(interfaces.IHashDir)
def __get__(self, inst, klass):
if inst is None: return self digest = inst.__dict__.get(self.__name, _marker) if digest is _marker: return None return getFile(digest)
def __set__(self, inst, value): # ignore if value is None if value is None: if inst.__dict__.has_key(self.__name): del inst.__dict__[self.__name] return # Handle case when value is a string if isinstance(value, unicode): value = value.encode('UTF-8') if isinstance(value, str): value = StringIO(value) value.seek(0) f = self.hd.new() while True: chunk = value.read(BLOCK_SIZE) if not chunk: newDigest = f.commit() oldDigest = inst.__dict__.get(self.__name, _marker) if newDigest == oldDigest: # we have no change, so we have to seek to zero # because this is normal behaviour when setting a # new value if hasattr(_storage, 'dataManager'): if newDigest in _storage.dataManager.files: f = _storage.dataManager.files[newDigest] f.seek(0) else: inst.__dict__[self.__name] = newDigest break f.write(chunk)
and at last added the real code which delete the file in hash directory too
i added following codes inside z3c.extfile.hashdir.HashDir class
def delete(self,digest): """delete the file""" path=self.getPath(digest) if os.path.exists(path): os.remove(path) return
Now, everything works fine when i try to delete an ExtFile object in ZODB, __del__() is not being invoked!!!!!
can anyone tell me how can i fix this???
thanks in advance
jayaraj
Zope-Dev maillist - Zope-Dev@zope.org http://mail.zope.org/mailman/listinfo/zope-dev ** No cross posts or HTML encoding! ** (Related lists - http://mail.zope.org/mailman/listinfo/zope-announce http://mail.zope.org/mailman/listinfo/zope )
hi, for the convenience of all separately attaching the diffs
rgds, jayaraj
On Fri, Sep 26, 2008 at 7:08 PM, Jayarajan Jn jayarajan@musmo.com wrote:
On Wed, Sep 24, 2008 at 8:58 PM, Jürgen kartnaller < juergen.kartnaller@gmail.com> wrote:
On Wed, Sep 24, 2008 at 9:10 AM, Jayarajan Jn jayarajan@musmo.comwrote:
Hi Jürgen,
First of all thank you for your reply.... hmm you are right too... And i am comfortable with the explicit delete. But in my prototype for i have also added webDAV interface for managing extfile.... Then when a user deletes the extfile object through webDAV interface, the file remains there.... coz i don't know where to put my codes to call delete() in that case... And in my project there wont be more than one extfiles refering to same file! so in my case its ok to go for implicit delete! but its now working!
You can never be sure if a file is only used once. If two user upload the same file only one copy is stored in extfile because both files have the same hash.
Yea thats right ... we will never know how many extfile objects share the file. But there is enough resources to be able to know it.
We can add a reference counting functionality to HashDir class. And then we can keep track of this.... see my code bellow.... i have created a ReferenceCounter and it works fine. I can now implicitly delete with out worry.
But still when i use it in my zope application the '__del__' itself is *not* being invoked :?
plz tell me what you think....
referencecounter.py:-
import os from persistent import Persistent
class ReferenceCounter(Persistent): """ keeps track number of ExtBytesProperties refering to the a file in HashDir """
def __init__(self, dirpath): self.path = os.path.join(dirpath,'refcount') self.counts={} if os.path.exists(self.path): # else case? we will deal with it in
commit! f= open(self.path) for line in f: digest, count=line.split(',') self.counts[digest]=int(count) f.close()
def getCount(self,digest): return self.counts.get(digest,0) def getTotalFiles(self): return len(self.counts) def addReference(self,digest): newCount=self.getCount(digest)+1 self.counts[digest]=newCount return newCount def removeReference(self,digest): newCount=self.getCount(digest)-1 if newCount > 0: self.counts[digest]=newCount elif newCount == 0: del self.counts[digest] return newCount def commit(self): f=open(self.path,'w') f.write('\n'.join("%s,%s" % (digest,count) for digest,count in self.counts.items())) f.close()
hashdir.py
import sha import os import stat import tempfile import shutil from types import StringTypes, UnicodeType import interfaces from zope import interface from persistent import Persistent from zope.cachedescriptors.property import Lazy from referencecounter import ReferenceCounter
class HashDir(Persistent):
"""a directory holding files named after their sha1 hash""" interface.implements(interfaces.IHashDir) _path = None def __init__(self, path=None): self.path = path self.refCounter = ReferenceCounter(self.etc) def _setPath(self, path): if path is None: return self._path = os.path.abspath(path) self.tmp = os.path.join(self.path, 'tmp') self.var = os.path.join(self.path, 'var') self.etc = os.path.join(self.path, 'etc') self._initPaths() def _getPath(self): return self._path path = property(_getPath,_setPath) def _initPaths(self): for path in [self.path,self.var,self.tmp,self.etc]: if not os.path.exists(path): os.mkdir(path) def new(self): """returns a new filehandle""" handle, path = tempfile.mkstemp(prefix='dirty.', dir=self.tmp) return WriteFile(self, handle, path) def commit(self, f): """commit a file, this is called by the file""" digest = f.sha.hexdigest() target = os.path.join(self.var, digest) if os.path.exists(target): # we have that content so just delete the tmp file os.remove(f.path) else: shutil.move(f.path, target) os.chmod(target, 0440) self.refCounter.addReference(digest) self.refCounter.commit() return digest def digests(self): """returns all digests stored""" return os.listdir(self.var) def getPath(self, digest): if type(digest) not in StringTypes or len(digest) != 40: raise ValueError, repr(digest) if type(self.var) is UnicodeType: digest = unicode(digest) path = os.path.join(self.var, digest) if not os.path.isfile(path): raise KeyError, digest return path def getSize(self, digest): return os.path.getsize(self.getPath(digest)) def open(self, digest): return ReadFile(self.getPath(digest)) def delete(self,digest): """delete the file""" path=self.getPath(digest) if os.path.exists(path): if self.refCounter.removeReference(digest) is 0: os.remove(path) self.refCounter.commit() return
class ReadFile(object):
"""A lazy read file implementation""" interface.implements(interfaces.IReadFile) def __init__(self, name, bufsize=-1): self.name = name self.digest = str(os.path.split(self.name)[1]) self.bufsize=bufsize self._v_len = None self._v_file = None @property def _file(self): if not self.closed: return self._v_file self._v_file = file(self.name, 'rb', self.bufsize) return self._v_file @Lazy def ctime(self): return int(os.stat(self.name)[stat.ST_CTIME]) @Lazy def atime(self): return int(os.stat(self.name)[stat.ST_ATIME]) def __len__(self): if self._v_len is None: self._v_len = int(os.stat(self.name)[stat.ST_SIZE]) return self._v_len def __repr__(self): return "<ReadFile named %s>" % repr(self.digest) @property def closed(self): """like file closed, but lazy""" return self._v_file is None or self._v_file.closed def seek(self, offset, whence=0): """see file.seek""" # we optimize when we have 0, 0 then we do not need to open # the file if it is closed, because on the next read we are at # 0 if offset==0 and whence==0 and self.closed: return return self._file.seek(offset, whence) def tell(self): """see file.tell""" if self.closed: return 0 return self._file.tell() def read(self, size=-1): """see file.read""" return self._file.read(size) def close(self): """see file.close""" if not self.closed: self._v_file.close() self._v_file = None def fileno(self): return self._file.fileno() def __iter__(self): return self._file.__iter__()
class WriteFile(object):
interface.implements(interfaces.IWriteFile) def __init__(self, hd, handle, path): self.hd = hd self.handle = handle self.path = path self.sha = sha.new() self._pos = 0 def write(self, s): self.sha.update(s) os.write(self.handle, s) self._pos += len(s) def commit(self): """returns the sha digest and saves the file""" os.close(self.handle) return self.hd.commit(self) def tell(self): """see file.tell""" return self._pos def abort(self): """abort the write and delete file""" os.close(self.handle) os.unlink(self.path)
property.py
from zope import component import interfaces from cStringIO import StringIO
from datamanager import getFile, _storage
_marker = object()
BLOCK_SIZE = 1024*128
class ExtBytesProperty(object):
"""a property which's values are stored as external files""" def __init__(self, name): self.__name = name def __delete__(self,inst): digest = inst.__dict__[self.__name] self.hd.delete(digest) @property def hd(self): return component.getUtility(interfaces.IHashDir) def __get__(self, inst, klass): if inst is None: return self digest = inst.__dict__.get(self.__name, _marker) if digest is _marker: return None return getFile(digest) def __set__(self, inst, value): # ignore if value is None if value is None: if inst.__dict__.has_key(self.__name): del inst.__dict__[self.__name] return # Handle case when value is a string if isinstance(value, unicode): value = value.encode('UTF-8') if isinstance(value, str): value = StringIO(value) value.seek(0) f = self.hd.new() while True: chunk = value.read(BLOCK_SIZE) if not chunk: newDigest = f.commit() oldDigest = inst.__dict__.get(self.__name, _marker) if newDigest == oldDigest: # we have no change, so we have to seek to zero # because this is normal behaviour when setting a # new value if hasattr(_storage, 'dataManager'): if newDigest in _storage.dataManager.files: f = _storage.dataManager.files[newDigest] f.seek(0) else: inst.__dict__[self.__name] = newDigest """ case 1: oldDigest=newDigets though the object is still
refering to same file f.comit would have incremented the reference by one. So to balance it oldDigest must be deleted. case 2: object referencing a new file. So old file must be deleted """ if oldDigest is not _marker: self.hd.delete(oldDigest)
break f.write(chunk)
file/file.py
from persistent import Persistent from z3c.extfile.property import ExtBytesProperty from interfaces import IExtFile from zope import interface
class ExtFile(Persistent):
"""A zope file implementation based on z3c.extfile""" interface.implements(IExtFile) data = ExtBytesProperty('data') def __init__(self, data='', contentType=''): self.data = data self.contentType = contentType def __del__(self): del self.data print "deleted data" def delete(self): del self.data print "deleted data via delete()" def getSize(self): return len(self.data)
rgds,
jayaraj
But form a general perspective you are correct.... may be HashDir will have to evolve to add reference counting feture just like python do!
On Wed, Sep 24, 2008 at 11:34 AM, Jürgen kartnaller < juergen.kartnaller@gmail.com> wrote:
Hi jayaraj, it is definitely not a good idea to implicitily delete files.
What if two ExtFile objects reference the same file ?
Jürgen
On Wed, Sep 24, 2008 at 4:37 AM, Jayarajan Jn jayarajan@musmo.comwrote:
Hi, i am now doing some prototypes for my projects which will be dealing with tones of files. After a little scouting i decided to try z3c.extfile. Every thing works fine. But i found it strange that there is no delete feature in z3c.extfile. ie, even if i can delete a ExtFile object, the file in the hash directory is not getting deleted! and it keeps on accumulating...
So i thought i 'll add a delete feature... but my __del__() approach doesn't work for me. but i added an additional delete() function too which can be invoked explicitly to delete the file before trying to delete ExtFile object.
i made following changes to the source... inside z3c.extfile.file.file.ExtFile,
class ExtFile(Persistent):
"""A zope file implementation based on z3c.extfile""" interface.implements(IExtFile) data = ExtBytesProperty('data') def __init__(self, data='', contentType=''): self.data = data self.contentType = contentType # added the following lines#
- def __del__(self): # <- this is not being invoked when i try to
delete an extfile object del self.data #print "deleted data via destructor"
def delete(self): # <- added this to be able to manually able
to delete files del self.data #print "deleted data via delete()"
# # # # # # # # # # # # # # #
- def getSize(self): return len(self.data)
and the 'data' is a 'property' (ExtBytesProperty) so i made following changes to z3c.extfile.property.ExtBytesProperty
class ExtBytesProperty(object):
"""a property which's values are stored as external files""" def __init__(self, name): self.__name = name # added the following lines#
def __delete__(self,inst): digest = inst.__dict__[self.__name] self.hd.delete(digest)
*# # # # # # # # # # # # # # #
@property def hd(self): return component.getUtility(interfaces.IHashDir)
def __get__(self, inst, klass):
if inst is None: return self digest = inst.__dict__.get(self.__name, _marker) if digest is _marker: return None return getFile(digest)
def __set__(self, inst, value): # ignore if value is None if value is None: if inst.__dict__.has_key(self.__name): del inst.__dict__[self.__name] return # Handle case when value is a string if isinstance(value, unicode): value = value.encode('UTF-8') if isinstance(value, str): value = StringIO(value) value.seek(0) f = self.hd.new() while True: chunk = value.read(BLOCK_SIZE) if not chunk: newDigest = f.commit() oldDigest = inst.__dict__.get(self.__name, _marker) if newDigest == oldDigest: # we have no change, so we have to seek to zero # because this is normal behaviour when setting a # new value if hasattr(_storage, 'dataManager'): if newDigest in _storage.dataManager.files: f = _storage.dataManager.files[newDigest] f.seek(0) else: inst.__dict__[self.__name] = newDigest break f.write(chunk)
and at last added the real code which delete the file in hash directory too
i added following codes inside z3c.extfile.hashdir.HashDir class
def delete(self,digest): """delete the file""" path=self.getPath(digest) if os.path.exists(path): os.remove(path) return
Now, everything works fine when i try to delete an ExtFile object in ZODB, __del__() is not being invoked!!!!!
can anyone tell me how can i fix this???
thanks in advance
jayaraj
Zope-Dev maillist - Zope-Dev@zope.org http://mail.zope.org/mailman/listinfo/zope-dev ** No cross posts or HTML encoding! ** (Related lists - http://mail.zope.org/mailman/listinfo/zope-announce http://mail.zope.org/mailman/listinfo/zope )