[Zope] using ZODB/Catalog outside of Zope - What am I doing wrong?

11 May 2000 01:01:40 -0500

Hello.  I'm trying to use the ZODB and Catalog out side of zope.  I
have three fields in my class: two FieldIndex and one TextIndex.
Searching on the FieldIndexes works fine but searching on the
TextIndex returns strange results.  Sometimes I get nothing on
something that should hit and other times the wrong result for things
that do hit.

The two scripts are below.  If you try running the search script and
pass it "Second" you will see what I mean.  It will return "First
Test" instead of "Second Test".

Thanks for any help,
^Roman

#############crawler.py#############################
#! /usr/bin/env python
import sys
sys.path.append('/usr/local/Zope/lib/python')
import ZODB
import Persistence
from Persistence import Persistent
from ZODB.FileStorage import FileStorage
from Products.ZCatalog.Catalog import Catalog

class CURL(Persistent):

    def __init__(self, url, ctype, the_data):
        self.url = url
        self.ctype  = ctype
        self.the_data = the_data

    def identify(self):
        print "URL: " + self.url + " Text: " + self.the_data

class URLStorer:

    def __init__(self, file='./db/db.fs'):
        self.file = file
        self.db = ZODB.DB(FileStorage(file))
        self.co = self.db.open()
        self.root = self.co.root()
        if self.root.has_key('cat'):
            self.cat = self.root['cat']
        else:
            self.cat = Catalog()
            self.cat.aq_parent= self.root
            self.root['cat']= self.cat
            self.cat.addIndex('url','FieldIndex')
            self.cat.addIndex('ctype' ,'FieldIndex')
            self.cat.addIndex('the_data','TextIndex')
            get_transaction().commit()

    def storeURL(self, curl):
        uid = id(curl)
        print "stored as " + str(uid)
        self.root[uid] = curl
        self.cat.catalogObject(curl, uid)
        get_transaction().commit()

    def searchURL(self, **kw):
        r = self.cat.searchResults(kw)
        for blah in r:
            paths = self.cat.paths
            root = self.root
        k = []
        for i in r:
            id = i.data_record_id_
            k.append(root[paths[id]])
        return k

if __name__ == "__main__":
    a = URLStorer()
    a.storeURL(CURL('http://foo', 'text/html', 'First Test'))
    a.storeURL(CURL('http://safbar', 'text/sdfhtml', 'Second Test'))
    a.storeURL(CURL('http://asfasf', 'afasfs', 'Third test'))
    a.storeURL(CURL('http://asfasf', 'afasfs', 'Fourth test'))

########################search.py#################

#! /usr/bin/env python
from crawler import CURL, URLStorer
import sys

b = URLStorer()
ids = b.searchURL(the_data=sys.argv[1])
for i in ids:
    i.identify()