[Zope-CVS] CVS: Products/ZCTextIndex/tests - mhindex.py:1.11
Guido van Rossum
guido@python.org
Thu, 23 May 2002 10:49:26 -0400
Update of /cvs-repository/Products/ZCTextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv12803
Modified Files:
mhindex.py
Log Message:
Dump word frequencies as well.
=== Products/ZCTextIndex/tests/mhindex.py 1.10 => 1.11 ===
def main():
try:
- opts, args = getopt.getopt(sys.argv[1:], "bd:hm:n:Op:t:uwW")
+ opts, args = getopt.getopt(sys.argv[1:], "bd:fhm:n:Op:t:uwW")
except getopt.error, msg:
print msg
print "use -h for help"
@@ -77,12 +77,14 @@
datafs = os.path.expanduser(DATAFS)
pack = 0
trans = 20000
- dumpwords = dumpwids = 0
+ dumpwords = dumpwids = dumpfreqs = 0
for o, a in opts:
if o == "-b":
bulk = 1
if o == "-d":
datafs = a
+ if o == "-f":
+ dumpfreqs = 1
if o == "-h":
print __doc__
return
@@ -103,11 +105,13 @@
if o == "-W":
dumpwids = 1
ix = Indexer(datafs, writable=update or bulk, trans=trans, pack=pack)
+ if dumpfreqs:
+ ix.dumpfreqs()
if dumpwords:
ix.dumpwords()
if dumpwids:
ix.dumpwids()
- if dumpwords or dumpwids:
+ if dumpwords or dumpwids or dumpfreqs:
return
if bulk:
if optimize:
@@ -172,15 +176,41 @@
print len(self.path2docid), "Pathnames"
print self.index.lexicon.length(), "Words"
+ def dumpfreqs(self):
+ lexicon = self.index.lexicon
+ index = self.index.index
+ assert isinstance(index, OkapiIndex)
+ L = []
+ for wid in lexicon.wids():
+ freq = 0
+ for f in index._wordinfo.get(wid, {}).values():
+ freq += f
+ L.append((freq, wid, lexicon.get_word(wid)))
+ L.sort()
+ L.reverse()
+ for freq, wid, word in L:
+ print "%10d %10d %s" % (wid, freq, word)
+
def dumpwids(self):
lexicon = self.index.lexicon
+ index = self.index.index
+ assert isinstance(index, OkapiIndex)
for wid in lexicon.wids():
- print "%10d %s" % (wid, lexicon.get_word(wid))
+ freq = 0
+ for f in index._wordinfo.get(wid, {}).values():
+ freq += f
+ print "%10d %10d %s" % (wid, freq, lexicon.get_word(wid))
def dumpwords(self):
lexicon = self.index.lexicon
+ index = self.index.index
+ assert isinstance(index, OkapiIndex)
for word in lexicon.words():
- print "%10d %s" % (lexicon.get_wid(word), word)
+ wid = lexicon.get_wid(word)
+ freq = 0
+ for f in index._wordinfo.get(wid, {}).values():
+ freq += f
+ print "%10d %10d %s" % (wid, freq, word)
def close(self):
self.root = None