################################################################################### ## ZExternalNews.py ## (c)2001, Olivier DECKMYN ################################################################################### """This module purposes a thread-safe class to handle the management (load, parse, browse, update etc...) of a syndicated news channel respecting RDF or RSS format (XML based). """ import os, time, string, sys from threading import Lock, Thread try: from xml.sax import saxlib, saxexts except: raise "ImportError", "PyXML is not installed. Install it from http://pyxml.sourceforge.net" ## Zope Products machinery imports from Globals import HTMLFile from Globals import MessageDialog from Globals import Persistent from Globals import default__class_init__ from zLOG import LOG, WARNING, INFO, ERROR, DEBUG import Acquisition import AccessControl.Role import OFS.Folder import OFS.SimpleItem ################################################################################### ## Utility functions ################################################################################### def clean_string(s): """Removes the too much spaces and crlf that might be in string""" return string.join(string.split(s)) def FormatDate(s): """Transforms a YYYYMMDDhhmmss timestamp string into a international string : YYYY/MM/DD hh:mm:ss""" (YYYY,MM,DD,hh,mm,ss)=(s[0:4], s[4:6], s[6:8], s[8:10], s[10:12], s[12:14]) return "%s/%s/%s %s:%s:%s" % (YYYY, MM, DD, hh, mm, ss) ################################################################################### ## Technical classes ################################################################################### class MyRDFParserHandler(saxlib.HandlerBase): """This is a very technical class, providing delegated method to the XML/RSS/RDF parser""" def __init__(self, channel): self.channel=channel # Channel we are working on self._current_element='' # 'item' , 'channel' or 'image' self._current_property='' # Current property for current element self._current_item=None # Current news item, if current element is an item def startElement(self,ele,attr): """Method called by the parser when a element starts""" if ele in ['image', 'channel', 'item'] : self._current_element=ele self._current_property='' if ele == 'item' : # Starting a new item self._current_item=self.channel._new_item() elif self._current_element=="item" and (ele in ZExternalNewsItem.__Properties__) : # Working on a sub element of item (a property of item) self._current_property=ele elif self._current_element=="image" and ele in ["url"]: # Working on url of image tag self._current_property=ele elif self._current_element=="channel" and ele in ["description", "title", "link"]: # Working on url of image tag self._current_property=ele elif self._current_element!='': # Working on url of image tag self._current_property="" else: pass #print "NO PROPERTY HANDLER FOR ", ele def endElement(self,ele): """Method called by the parser when a element ends""" if self._current_element=="item" and (ele in ZExternalNewsItem.__Properties__): self._current_property = "" elif ele in ['image', 'channel', 'item']: self._current_element='' if ele == 'item': self._current_item=None # end of current item def characters(self,ch,start,length): """Method called by the parser when reading text value (between element start and end tags)""" if self._current_property<>'': s=ch[start:start+length] s=clean_string(s) if self._current_element=="image" and self._current_property=="url": self.channel.image_url=self.channel.image_url+s elif self._current_element=="channel" and self._current_property=="title": self.channel.title=self.channel.title+s elif self._current_element=="channel" and self._current_property=="description": self.channel.description=self.channel.description+s elif self._current_element=="channel" and self._current_property=="link": self.channel.link=self.channel.link+s elif self._current_element=='item' and self._current_property!='': setattr( self._current_item, self._current_property ,getattr(self._current_item, self._current_property)+s) ################################################################################### ## Classes ################################################################################### # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Item Handling Class - Non Persistent (Transient) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class ZExternalNewsItem: __allow_access_to_unprotected_subobjects__ = 1 # So that everything is accessible from Zope # Just to keep a list of "public" properties, to ease enumeration __Properties__=['title', 'description', 'link' ] def __init__(self): # self.channel=channel self.title='' self.description='' self.link='' def __str__(self): result=[] for key in __Properties__: result.append(key+"="+getattr(self,key)) return string.join(result,", ") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Channel Handling Class # Persistent # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - manage_addZExternalNewsChannelForm = HTMLFile('www/ZExternalNewsChannelAdd', globals()) manage_editZExternalNewsChannelForm = HTMLFile('www/ZExternalNewsChannelEdit', globals()) manage_viewZExternalNewsChannel = HTMLFile('www/ZExternalNewsChannelView', globals()) def manage_addZExternalNewsChannel(self, id, url, REQUEST=None): "Add an instance of the ZExternalNewsChannel class" self._setObject(id, ZExternalNewsChannel(id, url, parse_now=0 )) if REQUEST is not None: return self.manage_main(self, REQUEST) class ZExternalNewsChannel( OFS.SimpleItem.Item, Persistent, Acquisition.Implicit, AccessControl.Role.RoleManager): """ ZExternalNewsChannel class. """ meta_type = 'ZExternalNewsChannel' icon = 'misc_/ZExternalNews/ZExternalNewsChannelIcon' _properties = ( {'id':'url', 'type':'string'}, ) manage_options = ( {'label':'View', 'action':'manage_view'}, {'label':'Edit', 'action':'manage_editForm'}, {'label':'Refresh', 'action':'manage_refresh'}, )+OFS.SimpleItem.SimpleItem.manage_options __ac_permissions__ = ( ('View management screens', ('manage_tabs', 'manage_main')), ('Change Permissions', ('manage_access',)), ('View ZExternalNewsChannels', ('',),('Anonymous', 'Manager')), ) manage_view=manage_viewZExternalNewsChannel manage_editForm = manage_editZExternalNewsChannelForm def __p_deactivate__(self): LOG('ZExternalNews', INFO, "NewsChannel[deactivate],",str(self.id)) Persistent.__p_deactive__(self) def __init__(self, id, url, name="", min_delay=10, parse_now=1): """Channel Constructor""" self.id=id self.url=url self.name=name # Protected variables self._min_delay=min_delay self._timestamp= 0 # Properties self.image_url='' self.title='' self.description='' self.link='' if parse_now==1: self.download_and_parse() def __setstate__(self, state): LOG('ZExternalNews', INFO, "NewsChannel[setstate],",str(self.id)) Persistent.__setstate__(self, state) self._v_items=[] # list of news items, could be removed from here as it is volatile def manage_refresh(self, REQUEST=None): """Refresh and return a message dialog""" self.refresh(force=1) if REQUEST is not None: return MessageDialog( title='Updated', message="Channel %s has been refreshed (forced)." % self.name, action = "./manage_main" ) def refresh(self, force=0): """If our channel is out of date (time spent > freq parameter), the channel is destroyed, loaded and parsed again""" if (force==1) or (time.time()-self._timestamp>=self._min_delay*60): self.download_and_parse() def download_and_parse(self): """Destroy, Download and Parses channel""" self.image_url='' self.title='' self.description='' self.link='' xmlp=saxexts.make_parser() # Prepare parser dh=MyRDFParserHandler(self) # Prepare Document Handler xmlp.setDocumentHandler(dh) self.clear_items() # Clear actual items list xmlp.parse(self.url) # Make the job (launches parsing + document handling) self._timestamp=time.time() # Store TimeStamp for refresh def _new_item(self): """Make and return a new item. This news item is owned by the channel""" n=ZExternalNewsItem() self.getItems().append(n) return n def clear_items(self): """Removes all items of the channel""" self._v_items=[] def asDirtyHTML(self): """Renders a channel in a dirty html way. Made to ease debug only.""" str=[] # First, a small table to display both image and title str.append('') if string.strip(self.image_url)!='': str.append('' % (self.link, self.image_url)) str.append('' % ( self.description) ) str.append('
%s
' % ( self.title)) str.append('%s
') str.append('
') # Then, then news items, simply listed with a link to external provider site. str.append('') str.append('Last updated %s' % ( FormatDate(self.getLastUpdateDateTime()) )) return string.join(str, '\n') def __call__(self): """Used when rendering object directly with dtml-var in Zope""" return self.asDirtyHTML() def getLastUpdateDateTime(self): """Returns last update date time a string using YYYYMMDDHHMMSS""" t=time.localtime(self._timestamp) return time.strftime("%Y%m%d%H%M%S", t) def getItems(self): """As _v_items is volatile, one need to protect the use of this variable through this method. This methods ensure that the volatile attribute is created again when the instance re-live (after a Zope restart or a cache sweep for example). NOTE : This should not happen anymore with setstate implemented""" #if not hasattr(self, '_v_items'): # self._timestamp=0 # self._v_items=[] return self._v_items def index_html(self): """Used when viewing the object through its url, directly.""" return self.asDirtyHTML() def manage_edit(self, url, REQUEST=None): "Change properties for the class instance." self.url = url if REQUEST is not None: return MessageDialog( title='Edited', message="Properties for %s has been changed." % self.id, action = "./manage_main" ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Channels Manager Class - Persistent # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - manage_addZExternalNewsManagerForm = HTMLFile('www/ZExternalNewsManagerAdd', globals()) def manage_addZExternalNewsManager(self, id, title, delay, REQUEST=None): "Add an instance of the ZExternalNewsManager class" self._setObject(id, ZExternalNewsManager(id, title, delay )) if REQUEST is not None: return self.manage_main(self, REQUEST) class ZExternalNewsManager( OFS.Folder.Folder, Persistent, Acquisition.Implicit, AccessControl.Role.RoleManager): """This class handles the management of a list of channels. It is reponsible for automatically updating them.""" meta_type = 'ZExternalNewsManager' icon = 'misc_/ZExternalNews/ZExternalNewsManagerIcon' _properties = ( {'id':'title', 'type':'string'}, {'id':'delay', 'type':'int'}, ) meta_types = ( # Theese are the types of objects that can be contained inside the container (in addition to std ones) { 'name':'ZExternalNewsChannel', 'action':'manage_addZExternalNewsChannelForm'}, ) manage_addZExternalNewsChannelForm = manage_addZExternalNewsChannelForm manage_addZExternalNewsChannel = manage_addZExternalNewsChannel manage_options = OFS.Folder.Folder.manage_options+( {'label':'Restart Auto Updater', 'action':'manage_restart'}, {'label':'Refresh All Channels', 'action':'manage_refresh'}, ) __ac_permissions__ = ( ('View management screens', ('manage_tabs', 'manage_main')), ('Change Permissions', ('manage_access',)), ('Add ZExternalNewsChannel', ('manage_addZExternalNewsChannel', 'manage_addZExternalNewsChannelForm')), ('View ZExternalNewsManagers', ('',),('Anonymous', 'Manager')), ) def __init__(self, id, title, delay): self.id=id self.title=title self.delay=delay def __p_deactivate__(self): LOG('ZExternalNews', INFO, "NewsManager[deactivate],",str(self.id)) Persistent.__p_deactive__(self) def __setstate__(self, state): LOG('ZExternalNews', DEBUG, "NewsManager[setstate],",str(self.id)) Persistent.__setstate__(self,state) self.auto_update() # Auto start the auto-udpdate thread def manage_restart(self, REQUEST=None): """Restart the autoupdate if needed""" if not hasattr(self, '_v_updater'): self.auto_update() message="AutoUpdate is restarted." else: message="AutoUpdate was already running : it was NOT restarted." if REQUEST is not None: return MessageDialog( title='Restarted', message=message, action = "./manage_main" ) def manage_refresh(self, REQUEST=None): """Refresh all the channels""" self.Refresh() if REQUEST is not None: return MessageDialog( title='Refresh', message="All channels were refreshed (errors, if any, are ignored here)." , action = "./manage_main" ) def auto_update(self): if hasattr(self, '_v_updater'): LOG('ZExternalNews', WARNING, "Restarting ZExternalNewsManager AutoUpdate,even if object STILL exist,",str(self.id)) self._v_updater=ExternalNewsManagerUpdater(self, self.delay*60) self._v_updater.setDaemon(1) # So that this thread will die when this instance will die, too. self._v_updater.start() # Let's rock LOG('ZExternalNews', DEBUG, "ZExternalNewsManager AutoUpdate started, will awake every %d minutes." % self.delay) def getChannelCount(self): """Returns number of managed channels""" return len(self.getChannels()) def getChannels(self): """Returns the list of all channels""" return self.objectValues(ZExternalNewsChannel.meta_type) # ZODB : List all ZExternalNewsChannel objects owned by the manager def getChannel(self, name): """Return channel object given its name""" return self.getItem(name) def Refresh(self): """Refreshes all channels. In case of error on update (invalid url for example), the show must go on.""" LOG('ZExternalNews', DEBUG, "Refreshing all", self.id ) for channel in self.getChannels(): try: if channel.meta_type==ZExternalNewsChannel.meta_type: # This is to avoid strange ZODB behaviour at Zope startup channel.refresh() except: LOG('ZExternalNews', ERROR, "Problem updating ZExternalNewsChannel '%s' (%s: %s). Ignored." % (channel.id, sys.exc_info()[0], sys.exc_info()[1]) ) def __str__(self): return ""% self.getChannelCount() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Manager Updater Class - Threaded - Non Persistent # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class ExternalNewsManagerUpdater(Thread): """This class is a THREAD responsible for updating in the background all the channel of a given channels manager every 'delay' seconds. This is not a user-level class. """ def __init__(self, manager, delay): """Initialize thread. Delay if in seconds. Manager is the attached ZExternalNewsManager instance""" Thread.__init__(self) self._manager=manager self._delay=delay def run(self): while 1: "Running Update" self._manager.Refresh() time.sleep(self._delay) default__class_init__(ZExternalNewsChannel) default__class_init__(ZExternalNewsManager)